From 7671d2f1d5598cdf8685d19166fb7013279168d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 12 Mar 2022 00:45:26 +0100 Subject: [PATCH 001/150] Change timezone in stateless tests --- ...s_and_insert_without_explicit_database.sql | 4 +- .../0_stateless/00189_time_zones_long.sql | 144 ++++++------ .../00206_empty_array_to_single.sql | 2 +- tests/queries/0_stateless/00301_csv.sh | 4 +- ...4_json_each_row_input_with_noisy_fields.sh | 2 +- tests/queries/0_stateless/00502_sum_map.sql | 2 +- .../00506_shard_global_in_union.sql | 2 +- .../00512_fractional_time_zones.sh | 2 +- .../0_stateless/00515_enhanced_time_zones.sql | 18 +- tests/queries/0_stateless/00538_datediff.sql | 24 +- .../0_stateless/00561_storage_join.sql | 2 +- .../0_stateless/00718_format_datetime.sql | 2 +- .../00719_format_datetime_rand.sql | 2 +- .../00735_long_conditional.reference | 12 +- .../0_stateless/00735_long_conditional.sql | 96 ++++---- ...01_daylight_saving_time_hour_underflow.sql | 2 +- ...aving_time_shift_backwards_at_midnight.sql | 2 +- .../00825_protobuf_format_persons.sh | 6 +- .../0_stateless/00835_if_generic_case.sql | 24 +- .../00910_decimal_group_array_crash_3783.sql | 14 +- ...00921_datetime64_compatibility_long.python | 130 +++++------ ...21_datetime64_compatibility_long.reference | 212 +++++++++--------- .../00927_asof_join_other_types.sh | 2 +- .../00935_to_iso_week_first_year.sql | 2 +- .../0_stateless/00941_to_custom_week.sql | 10 +- .../0_stateless/00945_bloom_filter_index.sql | 46 ++-- .../01077_mutations_index_consistency.sh | 12 +- .../0_stateless/01087_storage_generate.sql | 2 +- .../01087_table_function_generate.reference | 4 +- .../01087_table_function_generate.sql | 16 +- .../0_stateless/01098_msgpack_format.sh | 2 +- .../01186_conversion_to_nullable.sql | 6 +- ...ter_rename_with_default_zookeeper_long.sql | 4 +- .../0_stateless/01269_toStartOfSecond.sql | 2 +- tests/queries/0_stateless/01273_arrow_load.sh | 2 +- .../0_stateless/01277_toUnixTimestamp64.sql | 12 +- .../0_stateless/01280_min_map_max_map.sql | 4 +- .../0_stateless/01307_orc_output_format.sh | 2 +- .../01379_with_fill_several_columns.sql | 8 +- ...396_negative_datetime_saturate_to_zero.sql | 2 +- .../01414_low_cardinality_nullable.sql | 30 +-- ..._parse_date_time_best_effort_timestamp.sql | 6 +- .../01440_to_date_monotonicity.sql | 8 +- .../01442_date_time_with_params.reference | 6 +- .../01442_date_time_with_params.sql | 6 +- .../01508_partition_pruning_long.queries | 38 ++-- .../01508_partition_pruning_long.reference | 4 +- .../01516_date_time_output_format.sql | 8 +- .../0_stateless/01582_any_join_supertype.sql | 2 +- .../01615_two_args_function_index_fix.sql | 2 +- .../0_stateless/01676_reinterpret_as.sql | 4 +- .../01691_DateTime64_clamp.reference | 26 +-- .../0_stateless/01691_DateTime64_clamp.sql | 26 +-- .../01692_DateTime64_from_DateTime.sql | 6 +- .../0_stateless/01698_fix_toMinute.reference | 2 +- .../0_stateless/01698_fix_toMinute.sql | 6 +- .../01699_timezoneOffset.reference | 4 +- .../0_stateless/01699_timezoneOffset.sql | 16 +- .../01702_toDateTime_from_string_clamping.sql | 8 +- ...732_more_consistent_datetime64_parsing.sql | 8 +- .../01734_datetime64_from_float.sql | 6 +- .../0_stateless/01761_round_year_bounds.sql | 2 +- .../0_stateless/01769_extended_range_2.sql | 2 +- .../01772_to_start_of_hour_align.sql | 4 +- ...ormatDateTime_DateTime64_century.reference | 24 +- ...1802_formatDateTime_DateTime64_century.sql | 24 +- .../01802_toDateTime64_large_values.reference | 6 +- .../01802_toDateTime64_large_values.sql | 6 +- tests/queries/0_stateless/01811_datename.sql | 8 +- .../0_stateless/01821_to_date_time_ubsan.sql | 4 +- .../0_stateless/01852_map_combinator.sql | 2 +- ...1867_support_datetime64_version_column.sql | 2 +- .../01868_order_by_fill_with_datetime64.sql | 4 +- .../0_stateless/01891_partition_hash.sql | 2 +- .../01891_partition_hash_no_long_int.sql | 2 +- .../0_stateless/01905_to_json_string.sql | 2 +- .../0_stateless/01921_datatype_date32.sql | 18 +- .../01925_date_date_time_comparison.sql | 4 +- .../01926_date_date_time_supertype.reference | 14 +- .../01926_date_date_time_supertype.sql | 10 +- ...nversion_between_date32_and_datetime64.sql | 2 +- .../02096_date_time_1970_saturation.sql | 24 +- .../02176_toStartOfWeek_overflow_pruning.sql | 2 +- .../02184_default_table_engine.sql | 4 +- .../alltypes_list.parquet.columns | 2 +- ...1.column-metadata-handling.parquet.columns | 2 +- .../00900_parquet_create_table_columns.py | 4 +- 87 files changed, 624 insertions(+), 624 deletions(-) diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql index 2fd097b9538..f3130f24521 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql @@ -8,8 +8,8 @@ DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_view; DROP TABLE IF EXISTS test_view_filtered; -CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Europe/Moscow'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); -CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Europe/Moscow')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Asia/Istanbul'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Asia/Istanbul')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) ENGINE = Memory POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); diff --git a/tests/queries/0_stateless/00189_time_zones_long.sql b/tests/queries/0_stateless/00189_time_zones_long.sql index ecc5f62ed1d..a25c9c7a415 100644 --- a/tests/queries/0_stateless/00189_time_zones_long.sql +++ b/tests/queries/0_stateless/00189_time_zones_long.sql @@ -1,12 +1,12 @@ -- Tags: long -/* timestamp 1419800400 == 2014-12-29 00:00:00 (Europe/Moscow) */ -/* timestamp 1412106600 == 2014-09-30 23:50:00 (Europe/Moscow) */ -/* timestamp 1420102800 == 2015-01-01 12:00:00 (Europe/Moscow) */ -/* timestamp 1428310800 == 2015-04-06 12:00:00 (Europe/Moscow) */ -/* timestamp 1436956200 == 2015-07-15 13:30:00 (Europe/Moscow) */ -/* timestamp 1426415400 == 2015-03-15 13:30:00 (Europe/Moscow) */ -/* timestamp 1549483055 == 2019-02-06 22:57:35 (Europe/Moscow) */ +/* timestamp 1419800400 == 2014-12-29 00:00:00 (Asia/Istanbul) */ +/* timestamp 1412106600 == 2014-09-30 23:50:00 (Asia/Istanbul) */ +/* timestamp 1420102800 == 2015-01-01 12:00:00 (Asia/Istanbul) */ +/* timestamp 1428310800 == 2015-04-06 12:00:00 (Asia/Istanbul) */ +/* timestamp 1436956200 == 2015-07-15 13:30:00 (Asia/Istanbul) */ +/* timestamp 1426415400 == 2015-03-15 13:30:00 (Asia/Istanbul) */ +/* timestamp 1549483055 == 2019-02-06 22:57:35 (Asia/Istanbul) */ /* date 16343 == 2014-09-30 */ /* date 16433 == 2014-12-29 */ /* date 17933 == 2019-02-06 */ @@ -14,12 +14,12 @@ /* toStartOfDay */ SELECT 'toStartOfDay'; -SELECT toStartOfDay(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toStartOfDay(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toStartOfDay(toDateTime(1412106600), 'Europe/Paris'); SELECT toStartOfDay(toDateTime(1412106600), 'Europe/London'); SELECT toStartOfDay(toDateTime(1412106600), 'Asia/Tokyo'); SELECT toStartOfDay(toDateTime(1412106600), 'Pacific/Pitcairn'); -SELECT toStartOfDay(toDate(16343), 'Europe/Moscow'); +SELECT toStartOfDay(toDate(16343), 'Asia/Istanbul'); SELECT toStartOfDay(toDate(16343), 'Europe/Paris'); SELECT toStartOfDay(toDate(16343), 'Europe/London'); SELECT toStartOfDay(toDate(16343), 'Asia/Tokyo'); @@ -28,7 +28,7 @@ SELECT toStartOfDay(toDate(16343), 'Pacific/Pitcairn'); /* toMonday */ SELECT 'toMonday'; -SELECT toMonday(toDateTime(1419800400), 'Europe/Moscow'); +SELECT toMonday(toDateTime(1419800400), 'Asia/Istanbul'); SELECT toMonday(toDateTime(1419800400), 'Europe/Paris'); SELECT toMonday(toDateTime(1419800400), 'Europe/London'); SELECT toMonday(toDateTime(1419800400), 'Asia/Tokyo'); @@ -42,7 +42,7 @@ SELECT toMonday(toDate(16433)); /* toStartOfMonth */ SELECT 'toStartOfMonth'; -SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/Moscow'); +SELECT toStartOfMonth(toDateTime(1419800400), 'Asia/Istanbul'); SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/Paris'); SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/London'); SELECT toStartOfMonth(toDateTime(1419800400), 'Asia/Tokyo'); @@ -56,7 +56,7 @@ SELECT toStartOfMonth(toDate(16433)); /* toStartOfQuarter */ SELECT 'toStartOfQuarter'; -SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toStartOfQuarter(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/Paris'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/London'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Asia/Tokyo'); @@ -70,7 +70,7 @@ SELECT toStartOfQuarter(toDate(16343)); /* toStartOfYear */ SELECT 'toStartOfYear'; -SELECT toStartOfYear(toDateTime(1419800400), 'Europe/Moscow'); +SELECT toStartOfYear(toDateTime(1419800400), 'Asia/Istanbul'); SELECT toStartOfYear(toDateTime(1419800400), 'Europe/Paris'); SELECT toStartOfYear(toDateTime(1419800400), 'Europe/London'); SELECT toStartOfYear(toDateTime(1419800400), 'Asia/Tokyo'); @@ -84,7 +84,7 @@ SELECT toStartOfYear(toDate(16433)); /* toTime */ SELECT 'toTime'; -SELECT toString(toTime(toDateTime(1420102800), 'Europe/Moscow'), 'Europe/Moscow'), toString(toTime(toDateTime(1428310800), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toTime(toDateTime(1420102800), 'Asia/Istanbul'), 'Asia/Istanbul'), toString(toTime(toDateTime(1428310800), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toTime(toDateTime(1420102800), 'Europe/Paris'), 'Europe/Paris'), toString(toTime(toDateTime(1428310800), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toTime(toDateTime(1420102800), 'Europe/London'), 'Europe/London'), toString(toTime(toDateTime(1428310800), 'Europe/London'), 'Europe/London'); SELECT toString(toTime(toDateTime(1420102800), 'Asia/Tokyo'), 'Asia/Tokyo'), toString(toTime(toDateTime(1428310800), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -93,7 +93,7 @@ SELECT toString(toTime(toDateTime(1420102800), 'Pacific/Pitcairn'), 'Pacific/Pit /* toYear */ SELECT 'toYear'; -SELECT toYear(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toYear(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toYear(toDateTime(1412106600), 'Europe/Paris'); SELECT toYear(toDateTime(1412106600), 'Europe/London'); SELECT toYear(toDateTime(1412106600), 'Asia/Tokyo'); @@ -102,7 +102,7 @@ SELECT toYear(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toMonth */ SELECT 'toMonth'; -SELECT toMonth(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toMonth(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toMonth(toDateTime(1412106600), 'Europe/Paris'); SELECT toMonth(toDateTime(1412106600), 'Europe/London'); SELECT toMonth(toDateTime(1412106600), 'Asia/Tokyo'); @@ -111,7 +111,7 @@ SELECT toMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfMonth */ SELECT 'toDayOfMonth'; -SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toDayOfMonth(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/Paris'); SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/London'); SELECT toDayOfMonth(toDateTime(1412106600), 'Asia/Tokyo'); @@ -120,7 +120,7 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfWeek */ SELECT 'toDayOfWeek'; -SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris'); SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London'); SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo'); @@ -129,7 +129,7 @@ SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toHour */ SELECT 'toHour'; -SELECT toHour(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toHour(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toHour(toDateTime(1412106600), 'Europe/Paris'); SELECT toHour(toDateTime(1412106600), 'Europe/London'); SELECT toHour(toDateTime(1412106600), 'Asia/Tokyo'); @@ -138,7 +138,7 @@ SELECT toHour(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toMinute */ SELECT 'toMinute'; -SELECT toMinute(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toMinute(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toMinute(toDateTime(1412106600), 'Europe/Paris'); SELECT toMinute(toDateTime(1412106600), 'Europe/London'); SELECT toMinute(toDateTime(1412106600), 'Asia/Tokyo'); @@ -147,7 +147,7 @@ SELECT toMinute(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toSecond */ SELECT 'toSecond'; -SELECT toSecond(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toSecond(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toSecond(toDateTime(1412106600), 'Europe/Paris'); SELECT toSecond(toDateTime(1412106600), 'Europe/London'); SELECT toSecond(toDateTime(1412106600), 'Asia/Tokyo'); @@ -156,7 +156,7 @@ SELECT toSecond(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toStartOfMinute */ SELECT 'toStartOfMinute'; -SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -165,7 +165,7 @@ SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Pa /* toStartOfFiveMinute */ SELECT 'toStartOfFiveMinute'; -SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -174,7 +174,7 @@ SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Pacific/Pitcairn'), /* toStartOfTenMinutes */ SELECT 'toStartOfTenMinutes'; -SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -183,7 +183,7 @@ SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Pacific/Pitcairn'), /* toStartOfFifteenMinutes */ SELECT 'toStartOfFifteenMinutes'; -SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -192,7 +192,7 @@ SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Pacific/Pitcair /* toStartOfHour */ SELECT 'toStartOfHour'; -SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfHour(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -201,33 +201,33 @@ SELECT toString(toStartOfHour(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Paci /* toStartOfInterval */ SELECT 'toStartOfInterval'; -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 year, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 year, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 year, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 quarter, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 quarter, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 3 quarter, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 month, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 month, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 month, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 week, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 week, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 6 week, 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 6 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 24 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 20 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 90 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 second, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 second, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 second, 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 year, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 year, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 year, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 quarter, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 quarter, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 3 quarter, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 month, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 month, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 month, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 week, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 week, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 6 week, 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 6 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 24 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 20 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 90 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 second, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 second, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 second, 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toStartOfInterval(toDate(17933), INTERVAL 1 year); SELECT toStartOfInterval(toDate(17933), INTERVAL 2 year); SELECT toStartOfInterval(toDate(17933), INTERVAL 5 year); @@ -240,14 +240,14 @@ SELECT toStartOfInterval(toDate(17933), INTERVAL 5 month); SELECT toStartOfInterval(toDate(17933), INTERVAL 1 week); SELECT toStartOfInterval(toDate(17933), INTERVAL 2 week); SELECT toStartOfInterval(toDate(17933), INTERVAL 6 week); -SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 1 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 2 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 5 day, 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 1 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 2 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 5 day, 'Asia/Istanbul'), 'Asia/Istanbul'); /* toRelativeYearNum */ SELECT 'toRelativeYearNum'; -SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeYearNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeYearNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeYearNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeYearNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/London') - toRelativeYearNum(toDateTime(0), 'Europe/London'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeYearNum(toDateTime(0), 'Asia/Tokyo'); @@ -256,7 +256,7 @@ SELECT toRelativeYearNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeMonthNum */ SELECT 'toRelativeMonthNum'; -SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeMonthNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeMonthNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeMonthNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeMonthNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/London') - toRelativeMonthNum(toDateTime(0), 'Europe/London'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeMonthNum(toDateTime(0), 'Asia/Tokyo'); @@ -265,7 +265,7 @@ SELECT toRelativeMonthNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelati /* toRelativeWeekNum */ SELECT 'toRelativeWeekNum'; -SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeWeekNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeWeekNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeWeekNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeWeekNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/London') - toRelativeWeekNum(toDateTime(0), 'Europe/London'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeWeekNum(toDateTime(0), 'Asia/Tokyo'); @@ -274,7 +274,7 @@ SELECT toRelativeWeekNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeDayNum */ SELECT 'toRelativeDayNum'; -SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeDayNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeDayNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeDayNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeDayNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/London') - toRelativeDayNum(toDateTime(0), 'Europe/London'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeDayNum(toDateTime(0), 'Asia/Tokyo'); @@ -284,7 +284,7 @@ SELECT toUInt16(toRelativeDayNum(toDateTime(1412106600), 'Pacific/Pitcairn') - t /* toRelativeHourNum */ SELECT 'toRelativeHourNum'; -SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeHourNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeHourNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeHourNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeHourNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/London') - toRelativeHourNum(toDateTime(0), 'Europe/London'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeHourNum(toDateTime(0), 'Asia/Tokyo'); @@ -293,7 +293,7 @@ SELECT toRelativeHourNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeMinuteNum */ SELECT 'toRelativeMinuteNum'; -SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeMinuteNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeMinuteNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeMinuteNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/London') - toRelativeMinuteNum(toDateTime(0), 'Europe/London'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeMinuteNum(toDateTime(0), 'Asia/Tokyo'); @@ -302,7 +302,7 @@ SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelat /* toRelativeSecondNum */ SELECT 'toRelativeSecondNum'; -SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeSecondNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeSecondNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeSecondNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeSecondNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/London') - toRelativeSecondNum(toDateTime(0), 'Europe/London'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeSecondNum(toDateTime(0), 'Asia/Tokyo'); @@ -311,13 +311,13 @@ SELECT toRelativeSecondNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelat /* toDate */ SELECT 'toDate'; -SELECT toDate(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toDate(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toDate(toDateTime(1412106600), 'Europe/Paris'); SELECT toDate(toDateTime(1412106600), 'Europe/London'); SELECT toDate(toDateTime(1412106600), 'Asia/Tokyo'); SELECT toDate(toDateTime(1412106600), 'Pacific/Pitcairn'); -SELECT toDate(1412106600, 'Europe/Moscow'); +SELECT toDate(1412106600, 'Asia/Istanbul'); SELECT toDate(1412106600, 'Europe/Paris'); SELECT toDate(1412106600, 'Europe/London'); SELECT toDate(1412106600, 'Asia/Tokyo'); @@ -328,7 +328,7 @@ SELECT toDate(16343); /* toString */ SELECT 'toString'; -SELECT toString(toDateTime(1436956200), 'Europe/Moscow'); +SELECT toString(toDateTime(1436956200), 'Asia/Istanbul'); SELECT toString(toDateTime(1436956200), 'Europe/Paris'); SELECT toString(toDateTime(1436956200), 'Europe/London'); SELECT toString(toDateTime(1436956200), 'Asia/Tokyo'); @@ -337,13 +337,13 @@ SELECT toString(toDateTime(1436956200), 'Pacific/Pitcairn'); /* toUnixTimestamp */ SELECT 'toUnixTimestamp'; -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Paris'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/London'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Asia/Tokyo'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Pacific/Pitcairn'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Europe/Paris'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Europe/London'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Asia/Tokyo'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Pacific/Pitcairn'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Paris'), 'Europe/Paris'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/London'), 'Europe/London'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Tokyo'), 'Asia/Tokyo'); diff --git a/tests/queries/0_stateless/00206_empty_array_to_single.sql b/tests/queries/0_stateless/00206_empty_array_to_single.sql index 0e3ff4f3537..85e8f82436d 100644 --- a/tests/queries/0_stateless/00206_empty_array_to_single.sql +++ b/tests/queries/0_stateless/00206_empty_array_to_single.sql @@ -1,5 +1,5 @@ SELECT emptyArrayToSingle(arrayFilter(x -> x != 99, arrayJoin([[1, 2], [99], [4, 5, 6]]))); -SELECT emptyArrayToSingle(emptyArrayString()), emptyArrayToSingle(emptyArrayDate()), emptyArrayToSingle(arrayFilter(x -> 0, [now('Europe/Moscow')])); +SELECT emptyArrayToSingle(emptyArrayString()), emptyArrayToSingle(emptyArrayDate()), emptyArrayToSingle(arrayFilter(x -> 0, [now('Asia/Istanbul')])); SELECT emptyArrayToSingle(range(number % 3)), diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 50c64b312a7..b2618343dc0 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -18,7 +18,7 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Europe/Moscow'), s String) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Asia/Istanbul'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" 2016-01-02 01:02:03, "2" @@ -29,7 +29,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Europe/Moscow')), s Nullable(String)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Asia/Istanbul')), s Nullable(String)) ENGINE = Memory"; echo 'NULL, NULL "2016-01-01 01:02:03",NUL diff --git a/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh b/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh index 91ca0d7d869..f559b9d75bd 100755 --- a/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh +++ b/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh @@ -26,7 +26,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS json_noisy" echo $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS json_each_row" -$CLICKHOUSE_CLIENT -q "CREATE TABLE json_each_row (d DateTime('Europe/Moscow')) ENGINE = Memory" +$CLICKHOUSE_CLIENT -q "CREATE TABLE json_each_row (d DateTime('Asia/Istanbul')) ENGINE = Memory" echo '{"d" : "2017-08-31 18:36:48", "t" : ""} {"d" : "1504193808", "t" : -1} {"d" : 1504193808, "t" : []} diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql index 3ceb5b82952..acc87cc5f16 100644 --- a/tests/queries/0_stateless/00502_sum_map.sql +++ b/tests/queries/0_stateless/00502_sum_map.sql @@ -31,7 +31,7 @@ select sumMap(val, cnt) from ( SELECT [ CAST(1, 'UInt64') ] as val, [1] as cnt ) select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Float64') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST('a', 'Enum16(\'a\'=1)') ] as val, [1] as cnt ); -select sumMap(val, cnt) from ( SELECT [ CAST(1, 'DateTime(\'Europe/Moscow\')') ] as val, [1] as cnt ); +select sumMap(val, cnt) from ( SELECT [ CAST(1, 'DateTime(\'Asia/Istanbul\')') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Date') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST('01234567-89ab-cdef-0123-456789abcdef', 'UUID') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1] as cnt ); diff --git a/tests/queries/0_stateless/00506_shard_global_in_union.sql b/tests/queries/0_stateless/00506_shard_global_in_union.sql index b3009add7e5..e51c18c5678 100644 --- a/tests/queries/0_stateless/00506_shard_global_in_union.sql +++ b/tests/queries/0_stateless/00506_shard_global_in_union.sql @@ -22,7 +22,7 @@ DROP TABLE IF EXISTS union_bug; CREATE TABLE union_bug ( Event String, - Datetime DateTime('Europe/Moscow') + Datetime DateTime('Asia/Istanbul') ) Engine = Memory; INSERT INTO union_bug VALUES ('A', 1), ('B', 2); diff --git a/tests/queries/0_stateless/00512_fractional_time_zones.sh b/tests/queries/0_stateless/00512_fractional_time_zones.sh index 45be8fe8d17..eb459d22704 100755 --- a/tests/queries/0_stateless/00512_fractional_time_zones.sh +++ b/tests/queries/0_stateless/00512_fractional_time_zones.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TZ=Europe/Moscow ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" +TZ=Asia/Istanbul ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" TZ=Asia/Colombo ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" TZ=Asia/Kathmandu ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" diff --git a/tests/queries/0_stateless/00515_enhanced_time_zones.sql b/tests/queries/0_stateless/00515_enhanced_time_zones.sql index cae487dfab6..e555b6b87bc 100644 --- a/tests/queries/0_stateless/00515_enhanced_time_zones.sql +++ b/tests/queries/0_stateless/00515_enhanced_time_zones.sql @@ -1,6 +1,6 @@ -SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 1, 'Asia/Kolkata'); +SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 1, 'Asia/Kolkata'); SELECT addMonths(toDateTime('2017-11-05 10:37:47', 'Asia/Kolkata'), 1); -SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata'), 1); +SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'), 1); SELECT addMonths(toDateTime('2017-11-05 08:07:47'), 1); SELECT addMonths(materialize(toDateTime('2017-11-05 08:07:47')), 1); @@ -12,11 +12,11 @@ SELECT addMonths(materialize(toDateTime('2017-11-05 08:07:47')), -1); SELECT addMonths(toDateTime('2017-11-05 08:07:47'), materialize(-1)); SELECT addMonths(materialize(toDateTime('2017-11-05 08:07:47')), materialize(-1)); -SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Europe/Moscow'); -SELECT toUnixTimestamp(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Istanbul'); +SELECT toUnixTimestamp(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Istanbul'); -SELECT toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'); -SELECT toTimeZone(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata'); -SELECT toString(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow')); -SELECT toString(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata')); -SELECT toString(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata'); +SELECT toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'); +SELECT toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'); +SELECT toString(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul')); +SELECT toString(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata')); +SELECT toString(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'); diff --git a/tests/queries/0_stateless/00538_datediff.sql b/tests/queries/0_stateless/00538_datediff.sql index b76ab4ff3f8..5dc416ad0bd 100644 --- a/tests/queries/0_stateless/00538_datediff.sql +++ b/tests/queries/0_stateless/00538_datediff.sql @@ -43,12 +43,12 @@ SELECT DATEDIFF('year', today(), today() - INTERVAL 10 YEAR); SELECT 'Dependance of timezones'; -SELECT dateDiff('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); +SELECT dateDiff('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); SELECT dateDiff('month', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); SELECT dateDiff('week', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); @@ -57,12 +57,12 @@ SELECT dateDiff('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); SELECT dateDiff('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); SELECT dateDiff('second', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT dateDiff('month', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('week', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('day', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('hour', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('minute', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('second', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); +SELECT dateDiff('month', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('week', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('day', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('hour', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('minute', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('second', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); SELECT dateDiff('month', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); SELECT dateDiff('week', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); diff --git a/tests/queries/0_stateless/00561_storage_join.sql b/tests/queries/0_stateless/00561_storage_join.sql index ad4fab09c20..913ecec6f4a 100644 --- a/tests/queries/0_stateless/00561_storage_join.sql +++ b/tests/queries/0_stateless/00561_storage_join.sql @@ -1,7 +1,7 @@ drop table IF EXISTS joinbug; CREATE TABLE joinbug ( - event_date Date MATERIALIZED toDate(created, 'Europe/Moscow'), + event_date Date MATERIALIZED toDate(created, 'Asia/Istanbul'), id UInt64, id2 UInt64, val UInt64, diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index cd679fe9735..899e23f9da1 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -42,4 +42,4 @@ SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), 'no formatting pattern' SELECT formatDateTime(toDate('2018-01-01'), '%F %T'); SELECT formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'UTC'), - formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'Europe/Moscow') \ No newline at end of file + formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'Asia/Istanbul') \ No newline at end of file diff --git a/tests/queries/0_stateless/00719_format_datetime_rand.sql b/tests/queries/0_stateless/00719_format_datetime_rand.sql index b91c988e1ca..ee596912bc7 100644 --- a/tests/queries/0_stateless/00719_format_datetime_rand.sql +++ b/tests/queries/0_stateless/00719_format_datetime_rand.sql @@ -8,4 +8,4 @@ WITH toDate(today() + rand() % 4096) AS t SELECT count() FROM numbers(1000000) W -- Note: in some other timezones, daylight saving time change happens in midnight, so the first time of day is 01:00:00 instead of 00:00:00. -- Stick to Moscow timezone to avoid this issue. -WITH toDate(today() + rand() % 4096) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%F %T', 'Europe/Moscow') != toString(toDateTime(t, 'Europe/Moscow')); +WITH toDate(today() + rand() % 4096) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%F %T', 'Asia/Istanbul') != toString(toDateTime(t, 'Asia/Istanbul')); diff --git a/tests/queries/0_stateless/00735_long_conditional.reference b/tests/queries/0_stateless/00735_long_conditional.reference index 082c2d49de9..f6c06e64066 100644 --- a/tests/queries/0_stateless/00735_long_conditional.reference +++ b/tests/queries/0_stateless/00735_long_conditional.reference @@ -92,9 +92,9 @@ value vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') +1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') column vs value 0 1 1 Int8 Int8 Int8 0 1 1 Int8 Int16 Int16 @@ -189,6 +189,6 @@ column vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') +1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') diff --git a/tests/queries/0_stateless/00735_long_conditional.sql b/tests/queries/0_stateless/00735_long_conditional.sql index 73febad1310..662c87db48f 100644 --- a/tests/queries/0_stateless/00735_long_conditional.sql +++ b/tests/queries/0_stateless/00735_long_conditional.sql @@ -15,7 +15,7 @@ SELECT toInt8(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt8(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt8(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt8(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt8(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -31,7 +31,7 @@ SELECT toInt16(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt16(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt16(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt16(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt16(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -47,7 +47,7 @@ SELECT toInt32(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt32(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt32(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt32(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt32(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -63,7 +63,7 @@ SELECT toInt64(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt64(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt64(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt64(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt64(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt64(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -79,7 +79,7 @@ SELECT toUInt8(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toUInt8(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt8(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt8(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt8(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -95,7 +95,7 @@ SELECT toUInt16(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt16(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt16(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt16(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt16(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -111,7 +111,7 @@ SELECT toUInt32(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt32(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt32(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt32(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt32(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -127,7 +127,7 @@ SELECT toUInt64(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt64(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt64(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt64(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt64(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt64(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -143,26 +143,26 @@ SELECT toDate(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toDate(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDate(0) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT 'column vs value'; @@ -177,7 +177,7 @@ SELECT materialize(toInt8(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, to SELECT materialize(toInt8(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt8(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt8(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt8(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -193,7 +193,7 @@ SELECT materialize(toInt16(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt16(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt16(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt16(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt16(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -209,7 +209,7 @@ SELECT materialize(toInt32(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt32(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt32(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt32(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt32(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -225,7 +225,7 @@ SELECT materialize(toInt64(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt64(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt64(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt64(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt64(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt64(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -241,7 +241,7 @@ SELECT materialize(toUInt8(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toUInt8(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt8(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt8(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt8(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -257,7 +257,7 @@ SELECT materialize(toUInt16(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt16(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt16(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt16(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt16(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -273,7 +273,7 @@ SELECT materialize(toUInt32(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt32(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt32(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt32(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt32(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -289,7 +289,7 @@ SELECT materialize(toUInt64(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt64(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt64(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt64(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt64(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt64(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -305,23 +305,23 @@ SELECT materialize(toDate(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, to SELECT materialize(toDate(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDate(0)) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } diff --git a/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql b/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql index fa98d087a62..a86a863124c 100644 --- a/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql +++ b/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql @@ -1,6 +1,6 @@ -- See comment in DateLUTImpl.cpp: "We doesn't support cases when time change results in switching to previous day..." SELECT - ignore(toDateTime(370641600, 'Europe/Moscow') AS t), + ignore(toDateTime(370641600, 'Asia/Istanbul') AS t), replaceRegexpAll(toString(t), '\\d', 'x'), toHour(t) < 24, replaceRegexpAll(formatDateTime(t, '%Y-%m-%d %H:%M:%S; %R:%S; %F %T'), '\\d', 'x'); diff --git a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql index 4244ce2039b..1fdd9b20b2b 100644 --- a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql +++ b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql @@ -1,3 +1,3 @@ -- concat with empty string to defeat injectiveness of toString assumption. -SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Europe/Moscow') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; +SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Asia/Istanbul') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; SELECT concat('', toString(toDateTime('2018-09-19 00:00:00', 'Asia/Tehran') + INTERVAL number * 300 SECOND)) AS k FROM numbers(1000) GROUP BY k HAVING count() > 1 ORDER BY k; diff --git a/tests/queries/0_stateless/00825_protobuf_format_persons.sh b/tests/queries/0_stateless/00825_protobuf_format_persons.sh index bb376e6ed70..fe53e2995f9 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_persons.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_persons.sh @@ -28,7 +28,7 @@ CREATE TABLE persons_00825 (uuid UUID, photo Nullable(String), phoneNumber Nullable(FixedString(13)), isOnline UInt8, - visitTime Nullable(DateTime('Europe/Moscow')), + visitTime Nullable(DateTime('Asia/Istanbul')), age UInt8, zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823, 'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120, @@ -47,9 +47,9 @@ CREATE TABLE persons_00825 (uuid UUID, "nestiness_a_B.c_E" Array(UInt32) ) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00', 'Europe/Moscow'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]); +INSERT INTO persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00', 'Asia/Istanbul'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]); INSERT INTO persons_00825 VALUES (toUUID('c694ad8a-f714-4ea3-907d-fd54fb25d9b5'), 'Natalia', 'Sokolova', 'female', toDate('1992-03-08'), 'jpg', NULL, 0, NULL, 26, 'pisces', [], [100, 200, 50], 'Plymouth', [50.403724, -4.142123], 3.14159, NULL, 0.007, 5.4, -20000000000000, [], [], NULL, []); -INSERT INTO persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00', 'Europe/Moscow'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []); +INSERT INTO persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00', 'Asia/Istanbul'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []); SELECT * FROM persons_00825 ORDER BY name; EOF diff --git a/tests/queries/0_stateless/00835_if_generic_case.sql b/tests/queries/0_stateless/00835_if_generic_case.sql index 011cea46ffc..63baffcf17d 100644 --- a/tests/queries/0_stateless/00835_if_generic_case.sql +++ b/tests/queries/0_stateless/00835_if_generic_case.sql @@ -1,17 +1,17 @@ -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; SELECT rand() % 2 = 0 ? number : number FROM numbers(5); diff --git a/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql b/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql index cf0e0bac3dd..c6151d3bdee 100644 --- a/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql +++ b/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql @@ -8,7 +8,7 @@ select groupArray(s) from (select sum(n) s from (select toDecimal128(number, 10) DROP TABLE IF EXISTS sensor_value; CREATE TABLE sensor_value ( - received_at DateTime('Europe/Moscow'), + received_at DateTime('Asia/Istanbul'), device_id UUID, sensor_id UUID, value Nullable(Decimal(18, 4)), @@ -35,28 +35,28 @@ DROP TABLE sensor_value; select s.a, s.b, max(s.dt1) dt1, s.c, s.d, s.f, s.i, max(s.dt2) dt2 from ( select toUInt64(4360430) a , toUInt64(5681495) b - , toDateTime('2018-11-01 10:44:58', 'Europe/Moscow') dt1 + , toDateTime('2018-11-01 10:44:58', 'Asia/Istanbul') dt1 , 'txt' c , toDecimal128('274.350000000000', 12) d , toDecimal128(268.970000000000, 12) f , toDecimal128(0.000000000000, 12) i - , toDateTime('2018-11-02 00:00:00', 'Europe/Moscow') dt2 + , toDateTime('2018-11-02 00:00:00', 'Asia/Istanbul') dt2 union all select toUInt64(4341757) a , toUInt64(5657967) b - , toDateTime('2018-11-01 16:47:46', 'Europe/Moscow') dt1 + , toDateTime('2018-11-01 16:47:46', 'Asia/Istanbul') dt1 , 'txt' c , toDecimal128('321.380000000000', 12) d , toDecimal128(315.080000000000, 12) f , toDecimal128(0.000000000000, 12) i - , toDateTime('2018-11-02 00:00:00', 'Europe/Moscow') dt2 + , toDateTime('2018-11-02 00:00:00', 'Asia/Istanbul') dt2 union all select toUInt64(4360430) a , toUInt64(5681495) b - , toDateTime('2018-11-02 09:00:07', 'Europe/Moscow') dt1 + , toDateTime('2018-11-02 09:00:07', 'Asia/Istanbul') dt1 , 'txt' c , toDecimal128('274.350000000000', 12) d , toDecimal128(268.970000000000, 12) f , toDecimal128(0.000000000000, 12) i - , toDateTime('2018-11-02 00:00:00', 'Europe/Moscow') dt2 + , toDateTime('2018-11-02 00:00:00', 'Asia/Istanbul') dt2 ) s group by s.a, s.b, s.c, s.d, s.f, s.i ORDER BY s.a, s.b, s.c, s.d, s.f, s.i; diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python index 03cc088fd87..e4dbc716d7e 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python @@ -9,69 +9,69 @@ import argparse # Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime. FUNCTIONS=""" toTimeZone(N, 'UTC') -toYear(N, 'Europe/Moscow') -toQuarter(N, 'Europe/Moscow') -toMonth(N, 'Europe/Moscow') -toDayOfYear(N, 'Europe/Moscow') -toDayOfMonth(N, 'Europe/Moscow') -toDayOfWeek(N, 'Europe/Moscow') -toHour(N, 'Europe/Moscow') -toMinute(N, 'Europe/Moscow') -toSecond(N, 'Europe/Moscow') +toYear(N, 'Asia/Istanbul') +toQuarter(N, 'Asia/Istanbul') +toMonth(N, 'Asia/Istanbul') +toDayOfYear(N, 'Asia/Istanbul') +toDayOfMonth(N, 'Asia/Istanbul') +toDayOfWeek(N, 'Asia/Istanbul') +toHour(N, 'Asia/Istanbul') +toMinute(N, 'Asia/Istanbul') +toSecond(N, 'Asia/Istanbul') toUnixTimestamp(N) -toStartOfYear(N, 'Europe/Moscow') -toStartOfISOYear(N, 'Europe/Moscow') -toStartOfQuarter(N, 'Europe/Moscow') -toStartOfMonth(N, 'Europe/Moscow') -toMonday(N, 'Europe/Moscow') -toStartOfWeek(N, 'Europe/Moscow') -toStartOfDay(N, 'Europe/Moscow') -toStartOfHour(N, 'Europe/Moscow') -toStartOfMinute(N, 'Europe/Moscow') -toStartOfFiveMinute(N, 'Europe/Moscow') -toStartOfTenMinutes(N, 'Europe/Moscow') -toStartOfFifteenMinutes(N, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 1 year, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 1 month, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 1 day, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 15 minute, 'Europe/Moscow') -date_trunc('year', N, 'Europe/Moscow') -date_trunc('month', N, 'Europe/Moscow') -date_trunc('day', N, 'Europe/Moscow') -date_trunc('minute', N, 'Europe/Moscow') -toTime(N, 'Europe/Moscow') -toRelativeYearNum(N, 'Europe/Moscow') -toRelativeQuarterNum(N, 'Europe/Moscow') -toRelativeMonthNum(N, 'Europe/Moscow') -toRelativeWeekNum(N, 'Europe/Moscow') -toRelativeDayNum(N, 'Europe/Moscow') -toRelativeHourNum(N, 'Europe/Moscow') -toRelativeMinuteNum(N, 'Europe/Moscow') -toRelativeSecondNum(N, 'Europe/Moscow') -toISOYear(N, 'Europe/Moscow') -toISOWeek(N, 'Europe/Moscow') -toWeek(N, 'Europe/Moscow') -toYearWeek(N, 'Europe/Moscow') -timeSlot(N, 'Europe/Moscow') -toYYYYMM(N, 'Europe/Moscow') -toYYYYMMDD(N, 'Europe/Moscow') -toYYYYMMDDhhmmss(N, 'Europe/Moscow') -addYears(N, 1, 'Europe/Moscow') -addMonths(N, 1, 'Europe/Moscow') -addWeeks(N, 1, 'Europe/Moscow') -addDays(N, 1, 'Europe/Moscow') -addHours(N, 1, 'Europe/Moscow') -addMinutes(N, 1, 'Europe/Moscow') -addSeconds(N, 1, 'Europe/Moscow') -addQuarters(N, 1, 'Europe/Moscow') -subtractYears(N, 1, 'Europe/Moscow') -subtractMonths(N, 1, 'Europe/Moscow') -subtractWeeks(N, 1, 'Europe/Moscow') -subtractDays(N, 1, 'Europe/Moscow') -subtractHours(N, 1, 'Europe/Moscow') -subtractMinutes(N, 1, 'Europe/Moscow') -subtractSeconds(N, 1, 'Europe/Moscow') -subtractQuarters(N, 1, 'Europe/Moscow') +toStartOfYear(N, 'Asia/Istanbul') +toStartOfISOYear(N, 'Asia/Istanbul') +toStartOfQuarter(N, 'Asia/Istanbul') +toStartOfMonth(N, 'Asia/Istanbul') +toMonday(N, 'Asia/Istanbul') +toStartOfWeek(N, 'Asia/Istanbul') +toStartOfDay(N, 'Asia/Istanbul') +toStartOfHour(N, 'Asia/Istanbul') +toStartOfMinute(N, 'Asia/Istanbul') +toStartOfFiveMinute(N, 'Asia/Istanbul') +toStartOfTenMinutes(N, 'Asia/Istanbul') +toStartOfFifteenMinutes(N, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 1 year, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 1 month, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 1 day, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 15 minute, 'Asia/Istanbul') +date_trunc('year', N, 'Asia/Istanbul') +date_trunc('month', N, 'Asia/Istanbul') +date_trunc('day', N, 'Asia/Istanbul') +date_trunc('minute', N, 'Asia/Istanbul') +toTime(N, 'Asia/Istanbul') +toRelativeYearNum(N, 'Asia/Istanbul') +toRelativeQuarterNum(N, 'Asia/Istanbul') +toRelativeMonthNum(N, 'Asia/Istanbul') +toRelativeWeekNum(N, 'Asia/Istanbul') +toRelativeDayNum(N, 'Asia/Istanbul') +toRelativeHourNum(N, 'Asia/Istanbul') +toRelativeMinuteNum(N, 'Asia/Istanbul') +toRelativeSecondNum(N, 'Asia/Istanbul') +toISOYear(N, 'Asia/Istanbul') +toISOWeek(N, 'Asia/Istanbul') +toWeek(N, 'Asia/Istanbul') +toYearWeek(N, 'Asia/Istanbul') +timeSlot(N, 'Asia/Istanbul') +toYYYYMM(N, 'Asia/Istanbul') +toYYYYMMDD(N, 'Asia/Istanbul') +toYYYYMMDDhhmmss(N, 'Asia/Istanbul') +addYears(N, 1, 'Asia/Istanbul') +addMonths(N, 1, 'Asia/Istanbul') +addWeeks(N, 1, 'Asia/Istanbul') +addDays(N, 1, 'Asia/Istanbul') +addHours(N, 1, 'Asia/Istanbul') +addMinutes(N, 1, 'Asia/Istanbul') +addSeconds(N, 1, 'Asia/Istanbul') +addQuarters(N, 1, 'Asia/Istanbul') +subtractYears(N, 1, 'Asia/Istanbul') +subtractMonths(N, 1, 'Asia/Istanbul') +subtractWeeks(N, 1, 'Asia/Istanbul') +subtractDays(N, 1, 'Asia/Istanbul') +subtractHours(N, 1, 'Asia/Istanbul') +subtractMinutes(N, 1, 'Asia/Istanbul') +subtractSeconds(N, 1, 'Asia/Istanbul') +subtractQuarters(N, 1, 'Asia/Istanbul') CAST(N as DateTime('Europe/Minsk')) CAST(N as Date) CAST(N as UInt64) @@ -80,10 +80,10 @@ CAST(N as DateTime64(3, 'Europe/Minsk')) CAST(N as DateTime64(6, 'Europe/Minsk')) CAST(N as DateTime64(9, 'Europe/Minsk')) # Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer. -# CAST(N as DateTime64(12, 'Europe/Moscow')) +# CAST(N as DateTime64(12, 'Asia/Istanbul')) # DateTime64(18) will always fail due to zero precision, but it is Ok to test here: -# CAST(N as DateTime64(18, 'Europe/Moscow')) -formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Europe/Moscow') +# CAST(N as DateTime64(18, 'Asia/Istanbul')) +formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul') """.splitlines() # Expanded later to cartesian product of all arguments, using format string. diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference index 230bfa0c117..fa21fae0121 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference @@ -3,47 +3,47 @@ Code: 43 "DateTime('UTC')","2019-09-16 16:20:11" "DateTime64(3, 'UTC')","2019-09-16 16:20:11.234" ------------------------------------------ -SELECT toYear(N, \'Europe/Moscow\') +SELECT toYear(N, \'Asia/Istanbul\') "UInt16",2019 "UInt16",2019 "UInt16",2019 ------------------------------------------ -SELECT toQuarter(N, \'Europe/Moscow\') +SELECT toQuarter(N, \'Asia/Istanbul\') "UInt8",3 "UInt8",3 "UInt8",3 ------------------------------------------ -SELECT toMonth(N, \'Europe/Moscow\') +SELECT toMonth(N, \'Asia/Istanbul\') "UInt8",9 "UInt8",9 "UInt8",9 ------------------------------------------ -SELECT toDayOfYear(N, \'Europe/Moscow\') +SELECT toDayOfYear(N, \'Asia/Istanbul\') "UInt16",259 "UInt16",259 "UInt16",259 ------------------------------------------ -SELECT toDayOfMonth(N, \'Europe/Moscow\') +SELECT toDayOfMonth(N, \'Asia/Istanbul\') "UInt8",16 "UInt8",16 "UInt8",16 ------------------------------------------ -SELECT toDayOfWeek(N, \'Europe/Moscow\') +SELECT toDayOfWeek(N, \'Asia/Istanbul\') "UInt8",1 "UInt8",1 "UInt8",1 ------------------------------------------ -SELECT toHour(N, \'Europe/Moscow\') +SELECT toHour(N, \'Asia/Istanbul\') Code: 43 "UInt8",19 "UInt8",19 ------------------------------------------ -SELECT toMinute(N, \'Europe/Moscow\') +SELECT toMinute(N, \'Asia/Istanbul\') Code: 43 "UInt8",20 "UInt8",20 ------------------------------------------ -SELECT toSecond(N, \'Europe/Moscow\') +SELECT toSecond(N, \'Asia/Istanbul\') Code: 43 "UInt8",11 "UInt8",11 @@ -53,269 +53,269 @@ Code: 44 "UInt32",1568650811 "UInt32",1568650811 ------------------------------------------ -SELECT toStartOfYear(N, \'Europe/Moscow\') +SELECT toStartOfYear(N, \'Asia/Istanbul\') Code: 43 "Date","2019-01-01" "Date","2019-01-01" ------------------------------------------ -SELECT toStartOfISOYear(N, \'Europe/Moscow\') +SELECT toStartOfISOYear(N, \'Asia/Istanbul\') Code: 43 "Date","2018-12-31" "Date","2018-12-31" ------------------------------------------ -SELECT toStartOfQuarter(N, \'Europe/Moscow\') +SELECT toStartOfQuarter(N, \'Asia/Istanbul\') Code: 43 "Date","2019-07-01" "Date","2019-07-01" ------------------------------------------ -SELECT toStartOfMonth(N, \'Europe/Moscow\') +SELECT toStartOfMonth(N, \'Asia/Istanbul\') Code: 43 "Date","2019-09-01" "Date","2019-09-01" ------------------------------------------ -SELECT toMonday(N, \'Europe/Moscow\') +SELECT toMonday(N, \'Asia/Istanbul\') Code: 43 "Date","2019-09-16" "Date","2019-09-16" ------------------------------------------ -SELECT toStartOfWeek(N, \'Europe/Moscow\') +SELECT toStartOfWeek(N, \'Asia/Istanbul\') Code: 43 Code: 43 Code: 43 ------------------------------------------ -SELECT toStartOfDay(N, \'Europe/Moscow\') -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" +SELECT toStartOfDay(N, \'Asia/Istanbul\') +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" ------------------------------------------ -SELECT toStartOfHour(N, \'Europe/Moscow\') +SELECT toStartOfHour(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" ------------------------------------------ -SELECT toStartOfMinute(N, \'Europe/Moscow\') +SELECT toStartOfMinute(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toStartOfFiveMinute(N, \'Europe/Moscow\') +SELECT toStartOfFiveMinute(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toStartOfTenMinutes(N, \'Europe/Moscow\') +SELECT toStartOfTenMinutes(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toStartOfFifteenMinutes(N, \'Europe/Moscow\') +SELECT toStartOfFifteenMinutes(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 1 year, \'Europe/Moscow\') +SELECT toStartOfInterval(N, INTERVAL 1 year, \'Asia/Istanbul\') Code: 43 "Date","2019-01-01" "Date","2019-01-01" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 1 month, \'Europe/Moscow\') +SELECT toStartOfInterval(N, INTERVAL 1 month, \'Asia/Istanbul\') Code: 43 "Date","2019-09-01" "Date","2019-09-01" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 1 day, \'Europe/Moscow\') -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" +SELECT toStartOfInterval(N, INTERVAL 1 day, \'Asia/Istanbul\') +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 15 minute, \'Europe/Moscow\') +SELECT toStartOfInterval(N, INTERVAL 15 minute, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" ------------------------------------------ -SELECT date_trunc(\'year\', N, \'Europe/Moscow\') +SELECT date_trunc(\'year\', N, \'Asia/Istanbul\') Code: 43 "Date","2019-01-01" "Date","2019-01-01" ------------------------------------------ -SELECT date_trunc(\'month\', N, \'Europe/Moscow\') +SELECT date_trunc(\'month\', N, \'Asia/Istanbul\') Code: 43 "Date","2019-09-01" "Date","2019-09-01" ------------------------------------------ -SELECT date_trunc(\'day\', N, \'Europe/Moscow\') -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" +SELECT date_trunc(\'day\', N, \'Asia/Istanbul\') +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" ------------------------------------------ -SELECT date_trunc(\'minute\', N, \'Europe/Moscow\') +SELECT date_trunc(\'minute\', N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toTime(N, \'Europe/Moscow\') +SELECT toTime(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","1970-01-02 19:20:11" -"DateTime('Europe/Moscow')","1970-01-02 19:20:11" +"DateTime('Asia/Istanbul')","1970-01-02 19:20:11" +"DateTime('Asia/Istanbul')","1970-01-02 19:20:11" ------------------------------------------ -SELECT toRelativeYearNum(N, \'Europe/Moscow\') +SELECT toRelativeYearNum(N, \'Asia/Istanbul\') "UInt16",2019 "UInt16",2019 "UInt16",2019 ------------------------------------------ -SELECT toRelativeQuarterNum(N, \'Europe/Moscow\') +SELECT toRelativeQuarterNum(N, \'Asia/Istanbul\') "UInt32",8078 "UInt32",8078 "UInt32",8078 ------------------------------------------ -SELECT toRelativeMonthNum(N, \'Europe/Moscow\') +SELECT toRelativeMonthNum(N, \'Asia/Istanbul\') "UInt32",24237 "UInt32",24237 "UInt32",24237 ------------------------------------------ -SELECT toRelativeWeekNum(N, \'Europe/Moscow\') +SELECT toRelativeWeekNum(N, \'Asia/Istanbul\') "UInt32",2594 "UInt32",2594 "UInt32",2594 ------------------------------------------ -SELECT toRelativeDayNum(N, \'Europe/Moscow\') +SELECT toRelativeDayNum(N, \'Asia/Istanbul\') "UInt32",18155 "UInt32",18155 "UInt32",18155 ------------------------------------------ -SELECT toRelativeHourNum(N, \'Europe/Moscow\') +SELECT toRelativeHourNum(N, \'Asia/Istanbul\') "UInt32",435717 "UInt32",435736 "UInt32",435736 ------------------------------------------ -SELECT toRelativeMinuteNum(N, \'Europe/Moscow\') +SELECT toRelativeMinuteNum(N, \'Asia/Istanbul\') "UInt32",26143020 "UInt32",26144180 "UInt32",26144180 ------------------------------------------ -SELECT toRelativeSecondNum(N, \'Europe/Moscow\') +SELECT toRelativeSecondNum(N, \'Asia/Istanbul\') "UInt32",1568581200 "UInt32",1568650811 "UInt32",1568650811 ------------------------------------------ -SELECT toISOYear(N, \'Europe/Moscow\') +SELECT toISOYear(N, \'Asia/Istanbul\') "UInt16",2019 "UInt16",2019 "UInt16",2019 ------------------------------------------ -SELECT toISOWeek(N, \'Europe/Moscow\') +SELECT toISOWeek(N, \'Asia/Istanbul\') "UInt8",38 "UInt8",38 "UInt8",38 ------------------------------------------ -SELECT toWeek(N, \'Europe/Moscow\') +SELECT toWeek(N, \'Asia/Istanbul\') Code: 43 Code: 43 Code: 43 ------------------------------------------ -SELECT toYearWeek(N, \'Europe/Moscow\') +SELECT toYearWeek(N, \'Asia/Istanbul\') Code: 43 Code: 43 Code: 43 ------------------------------------------ -SELECT timeSlot(N, \'Europe/Moscow\') +SELECT timeSlot(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" ------------------------------------------ -SELECT toYYYYMM(N, \'Europe/Moscow\') +SELECT toYYYYMM(N, \'Asia/Istanbul\') "UInt32",201909 "UInt32",201909 "UInt32",201909 ------------------------------------------ -SELECT toYYYYMMDD(N, \'Europe/Moscow\') +SELECT toYYYYMMDD(N, \'Asia/Istanbul\') "UInt32",20190916 "UInt32",20190916 "UInt32",20190916 ------------------------------------------ -SELECT toYYYYMMDDhhmmss(N, \'Europe/Moscow\') +SELECT toYYYYMMDDhhmmss(N, \'Asia/Istanbul\') "UInt64",20190916000000 "UInt64",20190916192011 "UInt64",20190916192011 ------------------------------------------ -SELECT addYears(N, 1, \'Europe/Moscow\') +SELECT addYears(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2020-09-16 19:20:11" +"DateTime('Asia/Istanbul')","2020-09-16 19:20:11" Code: 43 ------------------------------------------ -SELECT addMonths(N, 1, \'Europe/Moscow\') +SELECT addMonths(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-10-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-10-16 19:20:11" Code: 43 ------------------------------------------ -SELECT addWeeks(N, 1, \'Europe/Moscow\') +SELECT addWeeks(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-23 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-23 19:20:11" Code: 43 ------------------------------------------ -SELECT addDays(N, 1, \'Europe/Moscow\') +SELECT addDays(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-17 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-17 19:20:11" Code: 43 ------------------------------------------ -SELECT addHours(N, 1, \'Europe/Moscow\') +SELECT addHours(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 20:20:11" +"DateTime('Asia/Istanbul')","2019-09-16 20:20:11" Code: 43 ------------------------------------------ -SELECT addMinutes(N, 1, \'Europe/Moscow\') +SELECT addMinutes(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:21:11" +"DateTime('Asia/Istanbul')","2019-09-16 19:21:11" Code: 43 ------------------------------------------ -SELECT addSeconds(N, 1, \'Europe/Moscow\') +SELECT addSeconds(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:12" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:12" Code: 43 ------------------------------------------ -SELECT addQuarters(N, 1, \'Europe/Moscow\') +SELECT addQuarters(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-12-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-12-16 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractYears(N, 1, \'Europe/Moscow\') +SELECT subtractYears(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2018-09-16 19:20:11" +"DateTime('Asia/Istanbul')","2018-09-16 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractMonths(N, 1, \'Europe/Moscow\') +SELECT subtractMonths(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-08-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-08-16 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractWeeks(N, 1, \'Europe/Moscow\') +SELECT subtractWeeks(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-09 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-09 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractDays(N, 1, \'Europe/Moscow\') +SELECT subtractDays(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-15 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-15 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractHours(N, 1, \'Europe/Moscow\') +SELECT subtractHours(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 18:20:11" +"DateTime('Asia/Istanbul')","2019-09-16 18:20:11" Code: 43 ------------------------------------------ -SELECT subtractMinutes(N, 1, \'Europe/Moscow\') +SELECT subtractMinutes(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:19:11" +"DateTime('Asia/Istanbul')","2019-09-16 19:19:11" Code: 43 ------------------------------------------ -SELECT subtractSeconds(N, 1, \'Europe/Moscow\') +SELECT subtractSeconds(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:10" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:10" Code: 43 ------------------------------------------ -SELECT subtractQuarters(N, 1, \'Europe/Moscow\') +SELECT subtractQuarters(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-06-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-06-16 19:20:11" Code: 43 ------------------------------------------ SELECT CAST(N as DateTime(\'Europe/Minsk\')) @@ -353,7 +353,7 @@ SELECT CAST(N as DateTime64(9, \'Europe/Minsk\')) "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.000000000" "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.234000000" ------------------------------------------ -SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\', \'Europe/Moscow\') +SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\', \'Asia/Istanbul\') "String","20 16 09/16/19 16 2019-09-16 00 12 259 09 00 AM 00:00 00 00:00:00 1 38 1 19 2019 %" "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %" "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %" diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sh b/tests/queries/0_stateless/00927_asof_join_other_types.sh index 0c17ca2085f..10173a3e43f 100755 --- a/tests/queries/0_stateless/00927_asof_join_other_types.sh +++ b/tests/queries/0_stateless/00927_asof_join_other_types.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Europe/Moscow')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Europe/Moscow')" +for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')" do $CLICKHOUSE_CLIENT -mn <= 5 LIMIT 15); -INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); +CREATE TABLE bloom_filter_array_types_test (order_key Array(UInt64), i8 Array(Int8), i16 Array(Int16), i32 Array(Int32), i64 Array(Int64), u8 Array(UInt8), u16 Array(UInt16), u32 Array(UInt32), u64 Array(UInt64), f32 Array(Float32), f64 Array(Float64), date Array(Date), date_time Array(DateTime('Asia/Istanbul')), str Array(String), fixed_string Array(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); +INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); +INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(i8, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(i16, 1); @@ -68,7 +68,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -83,7 +83,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -98,7 +98,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -106,8 +106,8 @@ DROP TABLE IF EXISTS bloom_filter_array_types_test; DROP TABLE IF EXISTS bloom_filter_null_types_test; -CREATE TABLE bloom_filter_null_types_test (order_key UInt64, i8 Nullable(Int8), i16 Nullable(Int16), i32 Nullable(Int32), i64 Nullable(Int64), u8 Nullable(UInt8), u16 Nullable(UInt16), u32 Nullable(UInt32), u64 Nullable(UInt64), f32 Nullable(Float32), f64 Nullable(Float64), date Nullable(Date), date_time Nullable(DateTime('Europe/Moscow')), str Nullable(String), fixed_string Nullable(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; -INSERT INTO bloom_filter_null_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; +CREATE TABLE bloom_filter_null_types_test (order_key UInt64, i8 Nullable(Int8), i16 Nullable(Int16), i32 Nullable(Int32), i64 Nullable(Int64), u8 Nullable(UInt8), u16 Nullable(UInt16), u32 Nullable(UInt32), u64 Nullable(UInt64), f32 Nullable(Float32), f64 Nullable(Float64), date Nullable(Date), date_time Nullable(DateTime('Asia/Istanbul')), str Nullable(String), fixed_string Nullable(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO bloom_filter_null_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Asia/Istanbul') AS date, toDateTime(number, 'Asia/Istanbul') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; INSERT INTO bloom_filter_null_types_test SELECT 0 AS order_key, NULL AS i8, NULL AS i16, NULL AS i32, NULL AS i64, NULL AS u8, NULL AS u16, NULL AS u32, NULL AS u64, NULL AS f32, NULL AS f64, NULL AS date, NULL AS date_time, NULL AS str, NULL AS fixed_string; SELECT COUNT() FROM bloom_filter_null_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6; @@ -121,7 +121,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE u64 = 1 SETTINGS max_rows SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; @@ -177,7 +177,7 @@ CREATE TABLE bloom_filter_array_lc_null_types_test ( f64 Array(LowCardinality(Nullable(Float64))), date Array(LowCardinality(Nullable(Date))), - date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), + date_time Array(LowCardinality(Nullable(DateTime('Asia/Istanbul')))), str Array(LowCardinality(Nullable(String))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))), @@ -197,16 +197,16 @@ SELECT groupArray(number) AS order_key, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); -INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); -INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); +INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); +INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); INSERT INTO bloom_filter_array_lc_null_types_test SELECT n AS order_key, n AS i8, n AS i16, n AS i32, n AS i64, n AS u8, n AS u16, n AS u32, n AS u64, n AS f32, n AS f64, n AS date, n AS date_time, n AS str, n AS fixed_string FROM (SELECT [NULL] AS n); -INSERT INTO bloom_filter_array_lc_null_types_test SELECT [NULL, n] AS order_key, [NULL, toInt8(n)] AS i8, [NULL, toInt16(n)] AS i16, [NULL, toInt32(n)] AS i32, [NULL, toInt64(n)] AS i64, [NULL, toUInt8(n)] AS u8, [NULL, toUInt16(n)] AS u16, [NULL, toUInt32(n)] AS u32, [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, [NULL, toDate(n, 'Europe/Moscow')] AS date, [NULL, toDateTime(n, 'Europe/Moscow')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); +INSERT INTO bloom_filter_array_lc_null_types_test SELECT [NULL, n] AS order_key, [NULL, toInt8(n)] AS i8, [NULL, toInt16(n)] AS i16, [NULL, toInt32(n)] AS i32, [NULL, toInt64(n)] AS i64, [NULL, toUInt8(n)] AS u8, [NULL, toUInt16(n)] AS u16, [NULL, toUInt32(n)] AS u32, [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, [NULL, toDate(n, 'Asia/Istanbul')] AS date, [NULL, toDateTime(n, 'Asia/Istanbul')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(i8, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(i16, 1); @@ -219,7 +219,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -234,7 +234,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -249,7 +249,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -279,7 +279,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-04-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '100'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5)); diff --git a/tests/queries/0_stateless/01077_mutations_index_consistency.sh b/tests/queries/0_stateless/01077_mutations_index_consistency.sh index 31086ed6784..c41eab62ecb 100755 --- a/tests/queries/0_stateless/01077_mutations_index_consistency.sh +++ b/tests/queries/0_stateless/01077_mutations_index_consistency.sh @@ -7,9 +7,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement" -$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Europe/Moscow')) Engine = MergeTree ORDER BY (toStartOfHour(date));" +$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date));" -$CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Europe/Moscow') + number%(23*3600) from numbers(1000000);" +$CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Asia/Istanbul') + number%(23*3600) from numbers(1000000);" $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE movement FINAL" @@ -18,20 +18,20 @@ SELECT count(), toStartOfHour(date) AS Hour FROM movement -WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Europe/Moscow')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Europe/Moscow')) +WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Asia/Istanbul')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Asia/Istanbul')) GROUP BY Hour ORDER BY Hour DESC " | grep "16:00:00" | cut -f1 -$CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00', 'Europe/Moscow') and date < toDateTime('2020-01-22T17:00:00', 'Europe/Moscow') SETTINGS mutations_sync = 2" +$CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00', 'Asia/Istanbul') and date < toDateTime('2020-01-22T17:00:00', 'Asia/Istanbul') SETTINGS mutations_sync = 2" $CLICKHOUSE_CLIENT -n --query " SELECT count(), toStartOfHour(date) AS Hour FROM movement -WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Europe/Moscow')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Europe/Moscow')) +WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Asia/Istanbul')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Asia/Istanbul')) GROUP BY Hour ORDER BY Hour DESC " | grep "16:00:00" | wc -l @@ -42,7 +42,7 @@ SELECT count(), toStartOfHour(date) AS Hour FROM movement -WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Europe/Moscow')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Europe/Moscow')) +WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Asia/Istanbul')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Asia/Istanbul')) GROUP BY Hour ORDER BY Hour DESC " | grep "22:00:00" | cut -f1 diff --git a/tests/queries/0_stateless/01087_storage_generate.sql b/tests/queries/0_stateless/01087_storage_generate.sql index a16ad55832c..7df9f3931d0 100644 --- a/tests/queries/0_stateless/01087_storage_generate.sql +++ b/tests/queries/0_stateless/01087_storage_generate.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Europe/Moscow'), UUID)) ENGINE=GenerateRandom(10, 5, 3); +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Asia/Istanbul'), UUID)) ENGINE=GenerateRandom(10, 5, 3); SELECT * FROM test_table_2 LIMIT 100; diff --git a/tests/queries/0_stateless/01087_table_function_generate.reference b/tests/queries/0_stateless/01087_table_function_generate.reference index ead4e97403b..ef7eac41ca2 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/tests/queries/0_stateless/01087_table_function_generate.reference @@ -46,7 +46,7 @@ h \N o - -Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') 2113-06-12 2050-12-17 02:46:35 2096-02-16 22:18:22 2141-08-09 2013-10-17 23:35:26 1976-01-24 12:52:48 2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50 @@ -58,7 +58,7 @@ Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') 2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39 2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22 - -DateTime64(3, \'Europe/Moscow\') DateTime64(6, \'Europe/Moscow\') DateTime64(6, \'Europe/Moscow\') +DateTime64(3, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') 1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215 1978-08-25 17:07:25.427 2034-05-02 20:49:42.148578 2015-08-26 15:26:31.783160 2037-04-04 10:50:56.898 2055-05-28 11:12:48.819271 2068-12-26 09:58:49.635722 diff --git a/tests/queries/0_stateless/01087_table_function_generate.sql b/tests/queries/0_stateless/01087_table_function_generate.sql index ef4311649f7..512121b7ecc 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/tests/queries/0_stateless/01087_table_function_generate.sql @@ -42,20 +42,20 @@ LIMIT 10; SELECT '-'; SELECT toTypeName(d), toTypeName(dt), toTypeName(dtm) -FROM generateRandom('d Date, dt DateTime(\'Europe/Moscow\'), dtm DateTime(\'Europe/Moscow\')') +FROM generateRandom('d Date, dt DateTime(\'Asia/Istanbul\'), dtm DateTime(\'Asia/Istanbul\')') LIMIT 1; SELECT d, dt, dtm -FROM generateRandom('d Date, dt DateTime(\'Europe/Moscow\'), dtm DateTime(\'Europe/Moscow\')', 1, 10, 10) +FROM generateRandom('d Date, dt DateTime(\'Asia/Istanbul\'), dtm DateTime(\'Asia/Istanbul\')', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT toTypeName(dt64), toTypeName(dts64), toTypeName(dtms64) -FROM generateRandom('dt64 DateTime64(3, \'Europe/Moscow\'), dts64 DateTime64(6, \'Europe/Moscow\'), dtms64 DateTime64(6 ,\'Europe/Moscow\')') +FROM generateRandom('dt64 DateTime64(3, \'Asia/Istanbul\'), dts64 DateTime64(6, \'Asia/Istanbul\'), dtms64 DateTime64(6 ,\'Asia/Istanbul\')') LIMIT 1; SELECT dt64, dts64, dtms64 -FROM generateRandom('dt64 DateTime64(3, \'Europe/Moscow\'), dts64 DateTime64(6, \'Europe/Moscow\'), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 1, 10, 10) +FROM generateRandom('dt64 DateTime64(3, \'Asia/Istanbul\'), dts64 DateTime64(6, \'Asia/Istanbul\'), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 1, 10, 10) LIMIT 10; SELECT toTypeName(d32) @@ -176,8 +176,8 @@ FROM generateRandom('i String', 1, 10, 10) LIMIT 10; SELECT '-'; DROP TABLE IF EXISTS test_table; -CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Europe/Moscow'), UUID)) ENGINE=Memory; -INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, \'Europe/Moscow\'), UUID)', 1, 10, 2) +CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Asia/Istanbul'), UUID)) ENGINE=Memory; +INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, \'Asia/Istanbul\'), UUID)', 1, 10, 2) LIMIT 10; SELECT * FROM test_table ORDER BY a, d, c; @@ -187,8 +187,8 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime('Europe/Moscow'), DateTime64(3, 'Europe/Moscow'), UUID), h FixedString(2)) ENGINE=Memory; -INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime(\'Europe/Moscow\'), DateTime64(3, \'Europe/Moscow\'), UUID), h FixedString(2)', 10, 5, 3) +CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime('Asia/Istanbul'), DateTime64(3, 'Asia/Istanbul'), UUID), h FixedString(2)) ENGINE=Memory; +INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime(\'Asia/Istanbul\'), DateTime64(3, \'Asia/Istanbul\'), UUID), h FixedString(2)', 10, 5, 3) LIMIT 10; SELECT a, b, c, d, e, f, g, hex(h) FROM test_table_2 ORDER BY a, b, c, d, e, f, g, h; diff --git a/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh index aa982c5478d..24638f33324 100755 --- a/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/tests/queries/0_stateless/01098_msgpack_format.sh @@ -11,7 +11,7 @@ USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonex $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Europe/Moscow'), datetime64 DateTime64(3, 'Europe/Moscow'), array Array(UInt32)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Asia/Istanbul'), datetime64 DateTime64(3, 'Asia/Istanbul'), array Array(UInt32)) ENGINE = Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, [1,2,3,4,5]), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, [5,4,3,2,1]), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, [42])"; diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.sql b/tests/queries/0_stateless/01186_conversion_to_nullable.sql index 828d3cac05b..52f7ee91f52 100644 --- a/tests/queries/0_stateless/01186_conversion_to_nullable.sql +++ b/tests/queries/0_stateless/01186_conversion_to_nullable.sql @@ -2,9 +2,9 @@ select toUInt8(x) from values('x Nullable(String)', '42', NULL, '0', '', '256'); select toInt64(x) from values('x Nullable(String)', '42', NULL, '0', '', '256'); select toDate(x) from values('x Nullable(String)', '2020-12-24', NULL, '0000-00-00', '', '9999-01-01'); -select toDateTime(x, 'Europe/Moscow') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); -select toDateTime64(x, 2, 'Europe/Moscow') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); -select toUnixTimestamp(x, 'Europe/Moscow') from values ('x Nullable(String)', '2000-01-01 13:12:12', NULL, ''); +select toDateTime(x, 'Asia/Istanbul') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); +select toDateTime64(x, 2, 'Asia/Istanbul') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); +select toUnixTimestamp(x, 'Asia/Istanbul') from values ('x Nullable(String)', '2000-01-01 13:12:12', NULL, ''); select toDecimal32(x, 2) from values ('x Nullable(String)', '42', NULL, '3.14159'); select toDecimal64(x, 8) from values ('x Nullable(String)', '42', NULL, '3.14159'); diff --git a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql index a831fd18bfe..c5c1f2ebfd3 100644 --- a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql +++ b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql @@ -14,7 +14,7 @@ ENGINE = MergeTree() PARTITION BY date ORDER BY key; -INSERT INTO table_rename_with_default (date, key, value1) SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Europe/Moscow'), number, toString(number) from numbers(9); +INSERT INTO table_rename_with_default (date, key, value1) SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Asia/Istanbul'), number, toString(number) from numbers(9); SELECT * FROM table_rename_with_default WHERE key = 1 FORMAT TSVWithNames; @@ -44,7 +44,7 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_01213/table_rename_wit ORDER BY tuple() TTL date2 + INTERVAL 500 MONTH; -INSERT INTO table_rename_with_ttl SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Europe/Moscow'), toDateTime(toDate('2018-10-01') + number % 3, 'Europe/Moscow'), toString(number), toString(number) from numbers(9); +INSERT INTO table_rename_with_ttl SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Asia/Istanbul'), toDateTime(toDate('2018-10-01') + number % 3, 'Asia/Istanbul'), toString(number), toString(number) from numbers(9); SELECT * FROM table_rename_with_ttl WHERE value1 = '1' FORMAT TSVWithNames; diff --git a/tests/queries/0_stateless/01269_toStartOfSecond.sql b/tests/queries/0_stateless/01269_toStartOfSecond.sql index b74eaabf351..641da4a15a9 100644 --- a/tests/queries/0_stateless/01269_toStartOfSecond.sql +++ b/tests/queries/0_stateless/01269_toStartOfSecond.sql @@ -4,7 +4,7 @@ SELECT toStartOfSecond(now()); -- {serverError 43} SELECT toStartOfSecond(); -- {serverError 42} SELECT toStartOfSecond(now64(), 123); -- {serverError 43} -WITH toDateTime64('2019-09-16 19:20:11', 3, 'Europe/Moscow') AS dt64 SELECT toStartOfSecond(dt64, 'UTC') AS res, toTypeName(res); +WITH toDateTime64('2019-09-16 19:20:11', 3, 'Asia/Istanbul') AS dt64 SELECT toStartOfSecond(dt64, 'UTC') AS res, toTypeName(res); WITH toDateTime64('2019-09-16 19:20:11', 0, 'UTC') AS dt64 SELECT toStartOfSecond(dt64) AS res, toTypeName(res); WITH toDateTime64('2019-09-16 19:20:11.123', 3, 'UTC') AS dt64 SELECT toStartOfSecond(dt64) AS res, toTypeName(res); WITH toDateTime64('2019-09-16 19:20:11.123', 9, 'UTC') AS dt64 SELECT toStartOfSecond(dt64) AS res, toTypeName(res); diff --git a/tests/queries/0_stateless/01273_arrow_load.sh b/tests/queries/0_stateless/01273_arrow_load.sh index 2e213ce3a79..fa70255821a 100755 --- a/tests/queries/0_stateless/01273_arrow_load.sh +++ b/tests/queries/0_stateless/01273_arrow_load.sh @@ -12,7 +12,7 @@ CB_DIR=$(dirname "$CLICKHOUSE_CLIENT_BINARY") DATA_FILE=$CUR_DIR/data_arrow/test.arrow ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (bool UInt8, int8 Int8, int16 Int16, int32 Int32, int64 Int64, uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, halffloat Float32, float Float32, double Float64, string String, date32 Date, date64 DateTime('Europe/Moscow'), timestamp DateTime('Europe/Moscow')) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (bool UInt8, int8 Int8, int16 Int16, int32 Int32, int64 Int64, uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, halffloat Float32, float Float32, double Float64, string String, date32 Date, date64 DateTime('Asia/Istanbul'), timestamp DateTime('Asia/Istanbul')) ENGINE = Memory" cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "insert into arrow_load format Arrow" ${CLICKHOUSE_CLIENT} --query="select * from arrow_load" diff --git a/tests/queries/0_stateless/01277_toUnixTimestamp64.sql b/tests/queries/0_stateless/01277_toUnixTimestamp64.sql index eb3e8c612ed..42de53beb66 100644 --- a/tests/queries/0_stateless/01277_toUnixTimestamp64.sql +++ b/tests/queries/0_stateless/01277_toUnixTimestamp64.sql @@ -12,22 +12,22 @@ SELECT toUnixTimestamp64Micro('abc', 123); -- {serverError 42} SELECT toUnixTimestamp64Nano('abc', 123); -- {serverError 42} SELECT 'const column'; -WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Europe/Moscow') AS dt64 +WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Asia/Istanbul') AS dt64 SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Europe/Moscow') AS dt64 +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Asia/Istanbul') AS dt64 SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Europe/Moscow') AS dt64 +WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Asia/Istanbul') AS dt64 SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); SELECT 'non-const column'; -WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Europe/Moscow') AS x +WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Asia/Istanbul') AS x SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Europe/Moscow') AS x +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Asia/Istanbul') AS x SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Europe/Moscow') AS x +WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Asia/Istanbul') AS x SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); diff --git a/tests/queries/0_stateless/01280_min_map_max_map.sql b/tests/queries/0_stateless/01280_min_map_max_map.sql index 96fdfc61929..9bc8c320d93 100644 --- a/tests/queries/0_stateless/01280_min_map_max_map.sql +++ b/tests/queries/0_stateless/01280_min_map_max_map.sql @@ -15,7 +15,7 @@ select minMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(Fixed select minMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2])); select minMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2])); select minMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2])); -select minMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); +select minMap(val, cnt) from values ('val Array(DateTime(\'Asia/Istanbul\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); select minMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2])); select minMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2])); @@ -28,7 +28,7 @@ select maxMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(Fixed select maxMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2])); select maxMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2])); select maxMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2])); -select maxMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); +select maxMap(val, cnt) from values ('val Array(DateTime(\'Asia/Istanbul\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); select maxMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2])); select maxMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2])); diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh index 926398e55bd..b17792af051 100755 --- a/tests/queries/0_stateless/01307_orc_output_format.sh +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, fixed FixedString(4), date Date, datetime DateTime('Europe/Moscow'), decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, fixed FixedString(4), date Date, datetime DateTime('Asia/Istanbul'), decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', '2020', 18980, 1639872000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', '2000', 20000, 1839882000, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', '4242', 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; diff --git a/tests/queries/0_stateless/01379_with_fill_several_columns.sql b/tests/queries/0_stateless/01379_with_fill_several_columns.sql index 505b9e0f8e1..6bdf7d41b57 100644 --- a/tests/queries/0_stateless/01379_with_fill_several_columns.sql +++ b/tests/queries/0_stateless/01379_with_fill_several_columns.sql @@ -1,6 +1,6 @@ SELECT - toDate(toDateTime((number * 10) * 86400, 'Europe/Moscow')) AS d1, - toDate(toDateTime(number * 86400, 'Europe/Moscow')) AS d2, + toDate(toDateTime((number * 10) * 86400, 'Asia/Istanbul')) AS d1, + toDate(toDateTime(number * 86400, 'Asia/Istanbul')) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 @@ -11,8 +11,8 @@ ORDER BY SELECT '==============='; SELECT - toDate(toDateTime((number * 10) * 86400, 'Europe/Moscow')) AS d1, - toDate(toDateTime(number * 86400, 'Europe/Moscow')) AS d2, + toDate(toDateTime((number * 10) * 86400, 'Asia/Istanbul')) AS d1, + toDate(toDateTime(number * 86400, 'Asia/Istanbul')) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 diff --git a/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql b/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql index 2711ab3cceb..e52c2d3dd1b 100644 --- a/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql +++ b/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql @@ -1 +1 @@ -SELECT toTimeZone(now(), 'Europe/Moscow') > '1970-01-01 00:00:00'; +SELECT toTimeZone(now(), 'Asia/Istanbul') > '1970-01-01 00:00:00'; diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index c11e990cea8..871d74d7fb9 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -15,7 +15,7 @@ CREATE TABLE lc_nullable ( f64 Array(LowCardinality(Nullable(Float64))), date Array(LowCardinality(Nullable(Date))), - date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), + date_time Array(LowCardinality(Nullable(DateTime('Asia/Istanbul')))), str Array(LowCardinality(Nullable(String))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))) @@ -33,8 +33,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); @@ -51,8 +51,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(num)) AS u64, groupArray(toFloat32(num)) AS f32, groupArray(toFloat64(num)) AS f64, - groupArray(toDate(num, 'Europe/Moscow')) AS date, - groupArray(toDateTime(num, 'Europe/Moscow')) AS date_time, + groupArray(toDate(num, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(num, 'Asia/Istanbul')) AS date_time, groupArray(toString(num)) AS str, groupArray(toFixedString(toString(num), 5)) AS fixed_string FROM (SELECT negate(number) as num FROM system.numbers LIMIT 15); @@ -69,8 +69,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); @@ -87,8 +87,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); @@ -123,8 +123,8 @@ INSERT INTO lc_nullable SELECT [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, - [NULL, toDate(n, 'Europe/Moscow')] AS date, - [NULL, toDateTime(n, 'Europe/Moscow')] AS date_time, + [NULL, toDate(n, 'Asia/Istanbul')] AS date, + [NULL, toDateTime(n, 'Asia/Istanbul')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); @@ -140,7 +140,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 1); SELECT count() FROM lc_nullable WHERE has(f32, 1); SELECT count() FROM lc_nullable WHERE has(f64, 1); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-02')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '1'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('1', 5)); @@ -168,7 +168,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 5); SELECT count() FROM lc_nullable WHERE has(f32, 5); SELECT count() FROM lc_nullable WHERE has(f64, 5); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-06')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '5'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('5', 5)); @@ -183,7 +183,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 10); SELECT count() FROM lc_nullable WHERE has(f32, 10); SELECT count() FROM lc_nullable WHERE has(f64, 10); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '10'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('10', 5)); @@ -213,7 +213,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 100); SELECT count() FROM lc_nullable WHERE has(f32, 100); SELECT count() FROM lc_nullable WHERE has(f64, 100); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-04-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '100'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('100', 5)); diff --git a/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql b/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql index 071fefe0403..58759c8585b 100644 --- a/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql +++ b/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql @@ -1,3 +1,3 @@ -SELECT parseDateTimeBestEffort('1596752940', 'Europe/Moscow'); -SELECT parseDateTimeBestEffort('100000000', 'Europe/Moscow'); -SELECT parseDateTimeBestEffort('20200807', 'Europe/Moscow'); +SELECT parseDateTimeBestEffort('1596752940', 'Asia/Istanbul'); +SELECT parseDateTimeBestEffort('100000000', 'Asia/Istanbul'); +SELECT parseDateTimeBestEffort('20200807', 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.sql b/tests/queries/0_stateless/01440_to_date_monotonicity.sql index 8843d7ffca6..b4c4f98b223 100644 --- a/tests/queries/0_stateless/01440_to_date_monotonicity.sql +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.sql @@ -1,11 +1,11 @@ DROP TABLE IF EXISTS tdm; DROP TABLE IF EXISTS tdm2; -CREATE TABLE tdm (x DateTime('Europe/Moscow')) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; +CREATE TABLE tdm (x DateTime('Asia/Istanbul')) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; INSERT INTO tdm VALUES (now()); -SELECT count(x) FROM tdm WHERE toDate(x) < toDate(now(), 'Europe/Moscow') SETTINGS max_rows_to_read = 1; +SELECT count(x) FROM tdm WHERE toDate(x) < toDate(now(), 'Asia/Istanbul') SETTINGS max_rows_to_read = 1; -SELECT toDate(-1), toDate(10000000000000, 'Europe/Moscow'), toDate(100), toDate(65536, 'UTC'), toDate(65535, 'Europe/Moscow'); -SELECT toDateTime(-1, 'Europe/Moscow'), toDateTime(10000000000000, 'Europe/Moscow'), toDateTime(1000, 'Europe/Moscow'); +SELECT toDate(-1), toDate(10000000000000, 'Asia/Istanbul'), toDate(100), toDate(65536, 'UTC'), toDate(65535, 'Asia/Istanbul'); +SELECT toDateTime(-1, 'Asia/Istanbul'), toDateTime(10000000000000, 'Asia/Istanbul'), toDateTime(1000, 'Asia/Istanbul'); CREATE TABLE tdm2 (timestamp UInt32) ENGINE = MergeTree ORDER BY timestamp SETTINGS index_granularity = 1; diff --git a/tests/queries/0_stateless/01442_date_time_with_params.reference b/tests/queries/0_stateless/01442_date_time_with_params.reference index 726e59d4d35..bc819d5f8fc 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.reference +++ b/tests/queries/0_stateless/01442_date_time_with_params.reference @@ -1,6 +1,6 @@ -2020-01-01 00:00:00 DateTime 2020-01-01 00:01:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:04:00.220 DateTime64(3, \'Europe/Moscow\') 2020-01-01 00:05:00 DateTime 2020-01-01 00:06:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:06:00 DateTime -2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:04:00.220 DateTime64(3, \'Europe/Moscow\') 2020-01-01 00:05:00 DateTime -2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:04:00.220 DateTime64(3, \'Europe/Moscow\') 2020-01-01 00:05:00 DateTime +2020-01-01 00:00:00 DateTime 2020-01-01 00:01:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:04:00.220 DateTime64(3, \'Asia/Istanbul\') 2020-01-01 00:05:00 DateTime 2020-01-01 00:06:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:06:00 DateTime +2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:04:00.220 DateTime64(3, \'Asia/Istanbul\') 2020-01-01 00:05:00 DateTime +2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:04:00.220 DateTime64(3, \'Asia/Istanbul\') 2020-01-01 00:05:00 DateTime 2020-01-01 00:00:00 DateTime parseDateTimeBestEffort 2020-05-14 03:37:03.000 DateTime64(3, \'UTC\') diff --git a/tests/queries/0_stateless/01442_date_time_with_params.sql b/tests/queries/0_stateless/01442_date_time_with_params.sql index 5a57aabdb0c..aeb9aa597ef 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.sql +++ b/tests/queries/0_stateless/01442_date_time_with_params.sql @@ -1,14 +1,14 @@ DROP TABLE IF EXISTS test; -CREATE TABLE test (a DateTime, b DateTime(), c DateTime(2), d DateTime('Europe/Moscow'), e DateTime(3, 'Europe/Moscow'), f DateTime32, g DateTime32('Europe/Moscow'), h DateTime(0)) ENGINE = MergeTree ORDER BY a; +CREATE TABLE test (a DateTime, b DateTime(), c DateTime(2), d DateTime('Asia/Istanbul'), e DateTime(3, 'Asia/Istanbul'), f DateTime32, g DateTime32('Asia/Istanbul'), h DateTime(0)) ENGINE = MergeTree ORDER BY a; INSERT INTO test VALUES('2020-01-01 00:00:00', '2020-01-01 00:01:00', '2020-01-01 00:02:00.11', '2020-01-01 00:03:00', '2020-01-01 00:04:00.22', '2020-01-01 00:05:00', '2020-01-01 00:06:00', '2020-01-01 00:06:00'); SELECT a, toTypeName(a), b, toTypeName(b), c, toTypeName(c), d, toTypeName(d), e, toTypeName(e), f, toTypeName(f), g, toTypeName(g), h, toTypeName(h) FROM test; -SELECT toDateTime('2020-01-01 00:00:00') AS a, toTypeName(a), toDateTime('2020-01-01 00:02:00.11', 2) AS b, toTypeName(b), toDateTime('2020-01-01 00:03:00', 'Europe/Moscow') AS c, toTypeName(c), toDateTime('2020-01-01 00:04:00.22', 3, 'Europe/Moscow') AS d, toTypeName(d), toDateTime('2020-01-01 00:05:00', 0) AS e, toTypeName(e); +SELECT toDateTime('2020-01-01 00:00:00') AS a, toTypeName(a), toDateTime('2020-01-01 00:02:00.11', 2) AS b, toTypeName(b), toDateTime('2020-01-01 00:03:00', 'Asia/Istanbul') AS c, toTypeName(c), toDateTime('2020-01-01 00:04:00.22', 3, 'Asia/Istanbul') AS d, toTypeName(d), toDateTime('2020-01-01 00:05:00', 0) AS e, toTypeName(e); -SELECT CAST('2020-01-01 00:00:00', 'DateTime') AS a, toTypeName(a), CAST('2020-01-01 00:02:00.11', 'DateTime(2)') AS b, toTypeName(b), CAST('2020-01-01 00:03:00', 'DateTime(\'Europe/Moscow\')') AS c, toTypeName(c), CAST('2020-01-01 00:04:00.22', 'DateTime(3, \'Europe/Moscow\')') AS d, toTypeName(d), CAST('2020-01-01 00:05:00', 'DateTime(0)') AS e, toTypeName(e); +SELECT CAST('2020-01-01 00:00:00', 'DateTime') AS a, toTypeName(a), CAST('2020-01-01 00:02:00.11', 'DateTime(2)') AS b, toTypeName(b), CAST('2020-01-01 00:03:00', 'DateTime(\'Asia/Istanbul\')') AS c, toTypeName(c), CAST('2020-01-01 00:04:00.22', 'DateTime(3, \'Asia/Istanbul\')') AS d, toTypeName(d), CAST('2020-01-01 00:05:00', 'DateTime(0)') AS e, toTypeName(e); SELECT toDateTime32('2020-01-01 00:00:00') AS a, toTypeName(a); diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.queries b/tests/queries/0_stateless/01508_partition_pruning_long.queries index 786240145a9..0d64fc05f0f 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.queries +++ b/tests/queries/0_stateless/01508_partition_pruning_long.queries @@ -2,20 +2,20 @@ DROP TABLE IF EXISTS tMM; DROP TABLE IF EXISTS tDD; DROP TABLE IF EXISTS sDD; DROP TABLE IF EXISTS xMM; -CREATE TABLE tMM(d DateTime('Europe/Moscow'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE tMM(d DateTime('Asia/Istanbul'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES tMM; -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -CREATE TABLE tDD(d DateTime('Europe/Moscow'),a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE tDD(d DateTime('Asia/Istanbul'),a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES tDD; -insert into tDD select toDateTime(toDate('2020-09-23'), 'Europe/Moscow'), number from numbers(10000) UNION ALL select toDateTime(toDateTime('2020-09-23 11:00:00', 'Europe/Moscow')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-24'), 'Europe/Moscow'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-25'), 'Europe/Moscow'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-08-15'), 'Europe/Moscow'), number from numbers(10000); +insert into tDD select toDateTime(toDate('2020-09-23'), 'Asia/Istanbul'), number from numbers(10000) UNION ALL select toDateTime(toDateTime('2020-09-23 11:00:00', 'Asia/Istanbul')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-24'), 'Asia/Istanbul'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-25'), 'Asia/Istanbul'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-08-15'), 'Asia/Istanbul'), number from numbers(10000); -CREATE TABLE sDD(d UInt64,a Int) ENGINE = MergeTree PARTITION BY toYYYYMM(toDate(intDiv(d,1000), 'Europe/Moscow')) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE sDD(d UInt64,a Int) ENGINE = MergeTree PARTITION BY toYYYYMM(toDate(intDiv(d,1000), 'Asia/Istanbul')) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES sDD; insert into sDD select (1597536000+number*60)*1000, number from numbers(5000); insert into sDD select (1597536000+number*60)*1000, number from numbers(5000); @@ -24,14 +24,14 @@ insert into sDD select (1598918400+number*60)*1000, number from numbers(5000); insert into sDD select (1601510400+number*60)*1000, number from numbers(5000); insert into sDD select (1602720000+number*60)*1000, number from numbers(5000); -CREATE TABLE xMM(d DateTime('Europe/Moscow'),a Int64, f Int64) ENGINE = MergeTree PARTITION BY (toYYYYMM(d), a) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE xMM(d DateTime('Asia/Istanbul'),a Int64, f Int64) ENGINE = MergeTree PARTITION BY (toYYYYMM(d), a) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES xMM; -INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, 1, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, 2, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, 3, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, 2, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00', 'Europe/Moscow') + number*60, 1, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00', 'Europe/Moscow') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, 2, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, 3, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, 2, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); SELECT '--------- tMM ----------------------------'; @@ -44,8 +44,8 @@ select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Europe/Moscow'); -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Europe/Moscow'); +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long.reference index 9cd208a336f..afdb4257505 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.reference +++ b/tests/queries/0_stateless/01508_partition_pruning_long.reference @@ -35,11 +35,11 @@ select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d 3 15000 Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Europe/Moscow'); +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); 6 30000 Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Europe/Moscow'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); 0 0 Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges diff --git a/tests/queries/0_stateless/01516_date_time_output_format.sql b/tests/queries/0_stateless/01516_date_time_output_format.sql index 224d8ef1035..3c99d1bb81b 100644 --- a/tests/queries/0_stateless/01516_date_time_output_format.sql +++ b/tests/queries/0_stateless/01516_date_time_output_format.sql @@ -1,16 +1,16 @@ DROP TABLE IF EXISTS test_datetime; -CREATE TABLE test_datetime(timestamp DateTime('Europe/Moscow')) ENGINE=Log; +CREATE TABLE test_datetime(timestamp DateTime('Asia/Istanbul')) ENGINE=Log; INSERT INTO test_datetime VALUES ('2020-10-15 00:00:00'); SET date_time_output_format = 'simple'; SELECT timestamp FROM test_datetime; -SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Europe/Moscow'), '%Y-%m-%d %R:%S') as formatted_simple FROM test_datetime; +SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul'), '%Y-%m-%d %R:%S') as formatted_simple FROM test_datetime; SET date_time_output_format = 'iso'; SELECT timestamp FROM test_datetime; -SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Europe/Moscow'), '%Y-%m-%dT%R:%SZ', 'UTC') as formatted_iso FROM test_datetime;; +SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul'), '%Y-%m-%dT%R:%SZ', 'UTC') as formatted_iso FROM test_datetime;; SET date_time_output_format = 'unix_timestamp'; SELECT timestamp FROM test_datetime; @@ -19,7 +19,7 @@ SELECT toUnixTimestamp(timestamp) FROM test_datetime; SET date_time_output_format = 'simple'; DROP TABLE test_datetime; -CREATE TABLE test_datetime(timestamp DateTime64(3, 'Europe/Moscow')) Engine=Log; +CREATE TABLE test_datetime(timestamp DateTime64(3, 'Asia/Istanbul')) Engine=Log; INSERT INTO test_datetime VALUES ('2020-10-15 00:00:00'), (1602709200123); diff --git a/tests/queries/0_stateless/01582_any_join_supertype.sql b/tests/queries/0_stateless/01582_any_join_supertype.sql index 6b06d78c83c..9cd7b4397ab 100644 --- a/tests/queries/0_stateless/01582_any_join_supertype.sql +++ b/tests/queries/0_stateless/01582_any_join_supertype.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS foo; DROP TABLE IF EXISTS bar; -CREATE TABLE foo (server_date Date, server_time Datetime('Europe/Moscow'), dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); +CREATE TABLE foo (server_date Date, server_time Datetime('Asia/Istanbul'), dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); CREATE TABLE bar (server_date Date, dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); INSERT INTO foo VALUES ('2020-01-01', '2020-01-01 12:00:00', 'test1'), ('2020-01-01', '2020-01-01 13:00:00', 'test2'); diff --git a/tests/queries/0_stateless/01615_two_args_function_index_fix.sql b/tests/queries/0_stateless/01615_two_args_function_index_fix.sql index dd2bde2eafc..6128bdfcdfb 100644 --- a/tests/queries/0_stateless/01615_two_args_function_index_fix.sql +++ b/tests/queries/0_stateless/01615_two_args_function_index_fix.sql @@ -1,6 +1,6 @@ drop table if exists bad_date_time; -create table bad_date_time (time Datetime('Europe/Moscow'), count UInt16) Engine = MergeTree() ORDER BY (time); +create table bad_date_time (time Datetime('Asia/Istanbul'), count UInt16) Engine = MergeTree() ORDER BY (time); insert into bad_date_time values('2020-12-20 20:59:52', 1), ('2020-12-20 21:59:52', 1), ('2020-12-20 01:59:52', 1); diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql index e8c2a0b1373..cc52859724d 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.sql +++ b/tests/queries/0_stateless/01676_reinterpret_as.sql @@ -30,8 +30,8 @@ SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11' SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a; SELECT 'Dates'; SELECT reinterpret(0, 'Date'), reinterpret('', 'Date'); -SELECT reinterpret(0, 'DateTime(''Europe/Moscow'')'), reinterpret('', 'DateTime(''Europe/Moscow'')'); -SELECT reinterpret(0, 'DateTime64(3, ''Europe/Moscow'')'), reinterpret('', 'DateTime64(3, ''Europe/Moscow'')'); +SELECT reinterpret(0, 'DateTime(''Asia/Istanbul'')'), reinterpret('', 'DateTime(''Asia/Istanbul'')'); +SELECT reinterpret(0, 'DateTime64(3, ''Asia/Istanbul'')'), reinterpret('', 'DateTime64(3, ''Asia/Istanbul'')'); SELECT 'Decimals'; SELECT reinterpret(toDecimal32(5, 2), 'Decimal32(2)'), reinterpret('1', 'Decimal32(2)'); SELECT reinterpret(toDecimal64(5, 2), 'Decimal64(2)'), reinterpret('1', 'Decimal64(2)');; diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference index 41a8d653a3f..7b3b9ae04d6 100644 --- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference @@ -1,27 +1,27 @@ -- { echo } -- These values are within the extended range of DateTime64 [1925-01-01, 2284-01-01) -SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow'); +SELECT toTimeZone(toDateTime(-2, 2), 'Asia/Istanbul'); 1970-01-01 02:59:58.00 -SELECT toDateTime64(-2, 2, 'Europe/Moscow'); +SELECT toDateTime64(-2, 2, 'Asia/Istanbul'); 1970-01-01 02:59:58.00 -SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow')); +SELECT CAST(-1 AS DateTime64(0, 'Asia/Istanbul')); 1970-01-01 02:59:59 -SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow')); +SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul')); 2020-01-01 00:00:00 -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null; -SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow'); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null; +SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul'); 1970-01-01 03:00:00.00 -SELECT toDateTime64(-2., 2, 'Europe/Moscow'); +SELECT toDateTime64(-2., 2, 'Asia/Istanbul'); 1970-01-01 03:00:00.00 -SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow'); +SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul'); 2106-02-07 09:28:16.00 -SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null; +SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null; -- These are outsize of extended range and hence clamped -SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); +SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); 1925-01-01 02:00:00.00 -SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); +SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); 1925-01-01 02:00:00.000 -SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); +SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); 2282-12-31 03:00:00.000 -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); 2282-12-31 03:00:00.00 diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql index 2786d9c1c09..b7077aff1f7 100644 --- a/tests/queries/0_stateless/01691_DateTime64_clamp.sql +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql @@ -1,17 +1,17 @@ -- { echo } -- These values are within the extended range of DateTime64 [1925-01-01, 2284-01-01) -SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow'); -SELECT toDateTime64(-2, 2, 'Europe/Moscow'); -SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow')); -SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow')); -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null; -SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow'); -SELECT toDateTime64(-2., 2, 'Europe/Moscow'); -SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow'); -SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null; +SELECT toTimeZone(toDateTime(-2, 2), 'Asia/Istanbul'); +SELECT toDateTime64(-2, 2, 'Asia/Istanbul'); +SELECT CAST(-1 AS DateTime64(0, 'Asia/Istanbul')); +SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul')); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null; +SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul'); +SELECT toDateTime64(-2., 2, 'Asia/Istanbul'); +SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul'); +SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null; -- These are outsize of extended range and hence clamped -SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); -SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); -SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); +SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); +SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); +SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql index fac0c341007..c08062a456c 100644 --- a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql +++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql @@ -1,7 +1,7 @@ -select toDateTime64(toDateTime(1, 'Europe/Moscow'), 2); +select toDateTime64(toDateTime(1, 'Asia/Istanbul'), 2); select toDateTime64(toDate(1), 2) FORMAT Null; -- Unknown timezone select toDateTime64(toDateTime(1), 2) FORMAT Null; -- Unknown timezone -select toDateTime64(toDateTime(1), 2, 'Europe/Moscow'); -select toDateTime64(toDate(1), 2, 'Europe/Moscow'); +select toDateTime64(toDateTime(1), 2, 'Asia/Istanbul'); +select toDateTime64(toDate(1), 2, 'Asia/Istanbul'); select toDateTime64(toDateTime(1), 2, 'GMT'); select toDateTime64(toDate(1), 2, 'GMT'); diff --git a/tests/queries/0_stateless/01698_fix_toMinute.reference b/tests/queries/0_stateless/01698_fix_toMinute.reference index 7675aad3a57..eb1f7eb9ca1 100644 --- a/tests/queries/0_stateless/01698_fix_toMinute.reference +++ b/tests/queries/0_stateless/01698_fix_toMinute.reference @@ -19,6 +19,6 @@ Check the bug causing situation: the special Australia/Lord_Howe time zone. toDa 1554569400 2019-04-07 03:20:00 2019-04-07 03:20:00 1554570000 2019-04-07 03:30:00 2019-04-07 03:30:00 1554570600 2019-04-07 03:40:00 2019-04-07 03:40:00 -4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour: +4 days test in batch comparing with manually computation result for Asia/Istanbul whose timezone epoc is of whole hour: 4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour: 4 days test in batch comparing with manually computation result for Australia/Lord_Howe whose timezone epoc is of half hour and also its DST offset is half hour: diff --git a/tests/queries/0_stateless/01698_fix_toMinute.sql b/tests/queries/0_stateless/01698_fix_toMinute.sql index f582806719d..4d11efa901d 100644 --- a/tests/queries/0_stateless/01698_fix_toMinute.sql +++ b/tests/queries/0_stateless/01698_fix_toMinute.sql @@ -3,9 +3,9 @@ SELECT 'Check the bug causing situation: the special Australia/Lord_Howe time zo SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, toString(x) as xx FROM numbers(20); /* The Batch Part. Test period is whole 4 days*/ -SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour:'; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT '4 days test in batch comparing with manually computation result for Asia/Istanbul whose timezone epoc is of whole hour:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour:'; SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference index a1cc6391e6f..860829f0ce6 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.reference +++ b/tests/queries/0_stateless/01699_timezoneOffset.reference @@ -1,4 +1,4 @@ -DST boundary test for Europe/Moscow: +DST boundary test for Asia/Istanbul: 0 1981-04-01 22:40:00 14400 354998400 1 1981-04-01 22:50:00 14400 354999000 2 1981-04-01 23:00:00 14400 354999600 @@ -70,7 +70,7 @@ DST boundary test for Australia/Lord_Howe: 15 2019-04-07 03:00:00 37800 1554568200 16 2019-04-07 03:10:00 37800 1554568800 17 2019-04-07 03:20:00 37800 1554569400 -4 days test in batch comparing with manually computation result for Europe/Moscow: +4 days test in batch comparing with manually computation result for Asia/Istanbul: 4 days test in batch comparing with manually computation result for Asia/Tehran: 4 days test in batch comparing with manually computation result for Australia/Lord_Howe Moscow DST Years: diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql index 8cabb23c4de..f9e6c2db970 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.sql +++ b/tests/queries/0_stateless/01699_timezoneOffset.sql @@ -1,8 +1,8 @@ /* Test the DST(daylight saving time) offset changing boundary*/ -SELECT 'DST boundary test for Europe/Moscow:'; -SELECT number,(toDateTime('1981-04-01 22:40:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); -SELECT number,(toDateTime('1981-09-30 23:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); +SELECT 'DST boundary test for Asia/Istanbul:'; +SELECT number,(toDateTime('1981-04-01 22:40:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); +SELECT number,(toDateTime('1981-09-30 23:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); SELECT 'DST boundary test for Asia/Tehran:'; SELECT number,(toDateTime('2020-03-21 22:40:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); @@ -18,9 +18,9 @@ SELECT number,(toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERV /* The Batch Part. Test period is whole 4 days*/ -SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow:'; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT '4 days test in batch comparing with manually computation result for Asia/Istanbul:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran:'; SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; @@ -34,9 +34,9 @@ SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/L /* Find all the years had followed DST during given period*/ SELECT 'Moscow DST Years:'; -SELECT number, (toDateTime('1970-06-01 00:00:00', 'Europe/Moscow') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800; +SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Istanbul') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800; SELECT 'Moscow DST Years with perment DST from 2011-2014:'; -SELECT min((toDateTime('2011-01-01 00:00:00', 'Europe/Moscow') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start; +SELECT min((toDateTime('2011-01-01 00:00:00', 'Asia/Istanbul') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start; SELECT 'Tehran DST Years:'; SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Tehran') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 12600; diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql index f51a1bb2280..b0dbd1dfc84 100644 --- a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql +++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql @@ -1,4 +1,4 @@ -SELECT toString(toDateTime('-922337203.6854775808', 1, 'Europe/Moscow')); -SELECT toString(toDateTime('9922337203.6854775808', 1, 'Europe/Moscow')); -SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1, 'Europe/Moscow'); -SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1, 'Europe/Moscow'); +SELECT toString(toDateTime('-922337203.6854775808', 1, 'Asia/Istanbul')); +SELECT toString(toDateTime('9922337203.6854775808', 1, 'Asia/Istanbul')); +SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1, 'Asia/Istanbul'); +SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql index 88859177a92..7e7fe3f2e16 100644 --- a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql +++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql @@ -5,7 +5,7 @@ INSERT INTO t VALUES (3, '1111111111222'); INSERT INTO t VALUES (4, '1111111111.222'); SELECT * FROM t ORDER BY i; -SELECT toDateTime64(1111111111.222, 3, 'Europe/Moscow'); -SELECT toDateTime64('1111111111.222', 3, 'Europe/Moscow'); -SELECT toDateTime64('1111111111222', 3, 'Europe/Moscow'); -SELECT ignore(toDateTime64(1111111111222, 3, 'Europe/Moscow')); -- This gives somewhat correct but unexpected result +SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul'); +SELECT toDateTime64('1111111111.222', 3, 'Asia/Istanbul'); +SELECT toDateTime64('1111111111222', 3, 'Asia/Istanbul'); +SELECT ignore(toDateTime64(1111111111222, 3, 'Asia/Istanbul')); -- This gives somewhat correct but unexpected result diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.sql b/tests/queries/0_stateless/01734_datetime64_from_float.sql index 416638a4a73..bb837c681e3 100644 --- a/tests/queries/0_stateless/01734_datetime64_from_float.sql +++ b/tests/queries/0_stateless/01734_datetime64_from_float.sql @@ -1,3 +1,3 @@ -SELECT CAST(1111111111.222 AS DateTime64(3, 'Europe/Moscow')); -SELECT toDateTime(1111111111.222, 3, 'Europe/Moscow'); -SELECT toDateTime64(1111111111.222, 3, 'Europe/Moscow'); +SELECT CAST(1111111111.222 AS DateTime64(3, 'Asia/Istanbul')); +SELECT toDateTime(1111111111.222, 3, 'Asia/Istanbul'); +SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01761_round_year_bounds.sql b/tests/queries/0_stateless/01761_round_year_bounds.sql index fed12c55568..57f421d155e 100644 --- a/tests/queries/0_stateless/01761_round_year_bounds.sql +++ b/tests/queries/0_stateless/01761_round_year_bounds.sql @@ -1 +1 @@ -SELECT toStartOfInterval(toDateTime(-9223372036854775808), toIntervalYear(100), 'Europe/Moscow') FORMAT Null; +SELECT toStartOfInterval(toDateTime(-9223372036854775808), toIntervalYear(100), 'Asia/Istanbul') FORMAT Null; diff --git a/tests/queries/0_stateless/01769_extended_range_2.sql b/tests/queries/0_stateless/01769_extended_range_2.sql index a2570c9397b..0b1319ddaea 100644 --- a/tests/queries/0_stateless/01769_extended_range_2.sql +++ b/tests/queries/0_stateless/01769_extended_range_2.sql @@ -1,3 +1,3 @@ SELECT toDateTime64('1969-12-31 18:00:12', 0, 'America/Phoenix'); SELECT toDateTime64('1969-12-30 18:00:12', 0, 'America/Phoenix'); -SELECT toDateTime64('1969-12-31 18:00:12', 0, 'Europe/Moscow'); +SELECT toDateTime64('1969-12-31 18:00:12', 0, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01772_to_start_of_hour_align.sql b/tests/queries/0_stateless/01772_to_start_of_hour_align.sql index 6d1bb460f90..5dbf6a97e57 100644 --- a/tests/queries/0_stateless/01772_to_start_of_hour_align.sql +++ b/tests/queries/0_stateless/01772_to_start_of_hour_align.sql @@ -13,8 +13,8 @@ SELECT toStartOfInterval(toDateTime('2021-03-23 13:58:00', 'Asia/Kolkata'), INTE -- In case of timezone shifts, rounding is performed to the hour number on "wall clock" time. -- The intervals may become shorter or longer due to time shifts. For example, the three hour interval may actually last two hours. -- If the same hour number on "wall clock" time correspond to multiple time points due to shifting backwards, the unspecified time point is selected among the candidates. -SELECT toDateTime('2010-03-28 00:00:00', 'Europe/Moscow') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); -SELECT toDateTime('2010-10-31 00:00:00', 'Europe/Moscow') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); +SELECT toDateTime('2010-03-28 00:00:00', 'Asia/Istanbul') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); +SELECT toDateTime('2010-10-31 00:00:00', 'Asia/Istanbul') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); -- And this should work even for non whole number of hours shifts. SELECT toDateTime('2020-04-05 00:00:00', 'Australia/Lord_Howe') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); diff --git a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference index 75c114cdd74..bf717d7da0b 100644 --- a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference +++ b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference @@ -1,27 +1,27 @@ -- { echo } -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); 20 -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 21 -SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 22 -- non-zero scale -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); 20 -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 21 -SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 22 diff --git a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql index e368f45cbda..712afd28cd6 100644 --- a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql +++ b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql @@ -1,16 +1,16 @@ -- { echo } -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); -- non-zero scale -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C'); \ No newline at end of file +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); \ No newline at end of file diff --git a/tests/queries/0_stateless/01802_toDateTime64_large_values.reference b/tests/queries/0_stateless/01802_toDateTime64_large_values.reference index c44c61ab93a..e60b1c30314 100644 --- a/tests/queries/0_stateless/01802_toDateTime64_large_values.reference +++ b/tests/queries/0_stateless/01802_toDateTime64_large_values.reference @@ -2,9 +2,9 @@ SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC'); 2205-12-12 12:12:12 -SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'); 2205-12-12 12:12:12 -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); 2205-12-12 12:12:12.000000 -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); 2205-12-12 12:12:12.000000 diff --git a/tests/queries/0_stateless/01802_toDateTime64_large_values.sql b/tests/queries/0_stateless/01802_toDateTime64_large_values.sql index 299111f43bc..d82d4433b2d 100644 --- a/tests/queries/0_stateless/01802_toDateTime64_large_values.sql +++ b/tests/queries/0_stateless/01802_toDateTime64_large_values.sql @@ -1,7 +1,7 @@ -- { echo } SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC'); -SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'); -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); \ No newline at end of file +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); \ No newline at end of file diff --git a/tests/queries/0_stateless/01811_datename.sql b/tests/queries/0_stateless/01811_datename.sql index 0cd538b52c7..b757d9ae018 100644 --- a/tests/queries/0_stateless/01811_datename.sql +++ b/tests/queries/0_stateless/01811_datename.sql @@ -66,7 +66,7 @@ SELECT WITH toDateTime('2021-04-14 23:22:33', 'UTC') as date SELECT - dateName('weekday', date, 'Europe/Moscow'), - dateName('hour', date, 'Europe/Moscow'), - dateName('minute', date, 'Europe/Moscow'), - dateName('second', date, 'Europe/Moscow'); + dateName('weekday', date, 'Asia/Istanbul'), + dateName('hour', date, 'Asia/Istanbul'), + dateName('minute', date, 'Asia/Istanbul'), + dateName('second', date, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01821_to_date_time_ubsan.sql b/tests/queries/0_stateless/01821_to_date_time_ubsan.sql index 377291e015f..5ec767fe413 100644 --- a/tests/queries/0_stateless/01821_to_date_time_ubsan.sql +++ b/tests/queries/0_stateless/01821_to_date_time_ubsan.sql @@ -1,2 +1,2 @@ -SELECT toDateTime('9223372036854775806', 7, 'Europe/Moscow'); -SELECT toDateTime('9223372036854775806', 8, 'Europe/Moscow'); +SELECT toDateTime('9223372036854775806', 7, 'Asia/Istanbul'); +SELECT toDateTime('9223372036854775806', 8, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01852_map_combinator.sql b/tests/queries/0_stateless/01852_map_combinator.sql index 3036e2e0ea4..a23a507bc27 100644 --- a/tests/queries/0_stateless/01852_map_combinator.sql +++ b/tests/queries/0_stateless/01852_map_combinator.sql @@ -26,7 +26,7 @@ select minMap(val) from values ('val Map(String, String)', (map('1', '1')), (ma select minMap(val) from values ('val Map(FixedString(1), FixedString(1))', (map('1', '1')), (map('1', '2'))); select minMap(val) from values ('val Map(UInt64, UInt64)', (map(1, 1)), (map(1, 2))); select minMap(val) from values ('val Map(Date, Int16)', (map(1, 1)), (map(1, 2))); -select minMap(val) from values ('val Map(DateTime(\'Europe/Moscow\'), Int32)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(DateTime(\'Asia/Istanbul\'), Int32)', (map(1, 1)), (map(1, 2))); select minMap(val) from values ('val Map(Enum16(\'a\'=1), Int16)', (map('a', 1)), (map('a', 2))); select maxMap(val) from values ('val Map(String, String)', (map('1', '1')), (map('1', '2'))); select minMap(val) from values ('val Map(Int128, Int128)', (map(1, 1)), (map(1, 2))); diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql index 1aea0fb91f2..2f0ed1fdc7f 100644 --- a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql @@ -1,5 +1,5 @@ drop table if exists replacing; -create table replacing( `A` Int64, `D` DateTime64(9, 'Europe/Moscow'), `S` String) ENGINE = ReplacingMergeTree(D) ORDER BY A; +create table replacing( `A` Int64, `D` DateTime64(9, 'Asia/Istanbul'), `S` String) ENGINE = ReplacingMergeTree(D) ORDER BY A; insert into replacing values (1,'1970-01-01 08:25:46.300800000','a'); insert into replacing values (2,'1970-01-01 08:25:46.300800002','b'); diff --git a/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql b/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql index ff3134d37ed..3a49ef73d1a 100644 --- a/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql +++ b/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql @@ -1,2 +1,2 @@ -SELECT n, source FROM (SELECT toDateTime64(number * 1000, 3,'Europe/Moscow') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 3); -SELECT n, source FROM (SELECT toDateTime64(number * 1000, 9,'Europe/Moscow') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 9); +SELECT n, source FROM (SELECT toDateTime64(number * 1000, 3,'Asia/Istanbul') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 3); +SELECT n, source FROM (SELECT toDateTime64(number * 1000, 9,'Asia/Istanbul') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 9); diff --git a/tests/queries/0_stateless/01891_partition_hash.sql b/tests/queries/0_stateless/01891_partition_hash.sql index f401c7c2d07..f56ed6a4ff4 100644 --- a/tests/queries/0_stateless/01891_partition_hash.sql +++ b/tests/queries/0_stateless/01891_partition_hash.sql @@ -1,5 +1,5 @@ drop table if exists tab; -create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Europe/Moscow'), dt64 DateTime64(3, 'Europe/Moscow'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple(); +create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple(); insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); -- Here we check that partition id did not change. -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server. diff --git a/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql b/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql index 643266f1ea3..431f566b806 100644 --- a/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql +++ b/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql @@ -1,7 +1,7 @@ -- Tags: long drop table if exists tab; -create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Europe/Moscow'), dt64 DateTime64(3, 'Europe/Moscow'), dec128 Decimal128(3), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, u8, u16, u32, u64, id, s, fs, a, t, d, dt, dt64, dec128, lc) order by tuple(); +create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, u8, u16, u32, u64, id, s, fs, a, t, d, dt, dt64, dec128, lc) order by tuple(); insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', 'a'); -- Here we check that partition id did not change. -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server. diff --git a/tests/queries/0_stateless/01905_to_json_string.sql b/tests/queries/0_stateless/01905_to_json_string.sql index e92c32f3422..38c02ef13fb 100644 --- a/tests/queries/0_stateless/01905_to_json_string.sql +++ b/tests/queries/0_stateless/01905_to_json_string.sql @@ -8,7 +8,7 @@ $$ d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, - g Tuple(Date, DateTime('Europe/Moscow'), DateTime64(3, 'Europe/Moscow'), UUID), + g Tuple(Date, DateTime('Asia/Istanbul'), DateTime64(3, 'Asia/Istanbul'), UUID), h FixedString(2), i Array(Nullable(UUID)) $$, 10, 5, 3) limit 2; diff --git a/tests/queries/0_stateless/01921_datatype_date32.sql b/tests/queries/0_stateless/01921_datatype_date32.sql index e01bdfeee8d..49e5366b455 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.sql +++ b/tests/queries/0_stateless/01921_datatype_date32.sql @@ -23,7 +23,7 @@ select toMinute(x1) from t1; -- { serverError 43 } select '-------toSecond---------'; select toSecond(x1) from t1; -- { serverError 43 } select '-------toStartOfDay---------'; -select toStartOfDay(x1, 'Europe/Moscow') from t1; +select toStartOfDay(x1, 'Asia/Istanbul') from t1; select '-------toMonday---------'; select toMonday(x1) from t1; select '-------toISOWeek---------'; @@ -57,21 +57,21 @@ select toStartOfHour(x1) from t1; -- { serverError 43 } select '-------toStartOfISOYear---------'; select toStartOfISOYear(x1) from t1; select '-------toRelativeYearNum---------'; -select toRelativeYearNum(x1, 'Europe/Moscow') from t1; +select toRelativeYearNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeQuarterNum---------'; -select toRelativeQuarterNum(x1, 'Europe/Moscow') from t1; +select toRelativeQuarterNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeMonthNum---------'; -select toRelativeMonthNum(x1, 'Europe/Moscow') from t1; +select toRelativeMonthNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeWeekNum---------'; -select toRelativeWeekNum(x1, 'Europe/Moscow') from t1; +select toRelativeWeekNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeDayNum---------'; -select toRelativeDayNum(x1, 'Europe/Moscow') from t1; +select toRelativeDayNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeHourNum---------'; -select toRelativeHourNum(x1, 'Europe/Moscow') from t1; +select toRelativeHourNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeMinuteNum---------'; -select toRelativeMinuteNum(x1, 'Europe/Moscow') from t1; +select toRelativeMinuteNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeSecondNum---------'; -select toRelativeSecondNum(x1, 'Europe/Moscow') from t1; +select toRelativeSecondNum(x1, 'Asia/Istanbul') from t1; select '-------toTime---------'; select toTime(x1) from t1; -- { serverError 43 } select '-------toYYYYMM---------'; diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.sql b/tests/queries/0_stateless/01925_date_date_time_comparison.sql index 13e856384d2..0659d85b028 100644 --- a/tests/queries/0_stateless/01925_date_date_time_comparison.sql +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.sql @@ -1,2 +1,2 @@ -SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Europe/Moscow'); -SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 0, 'Europe/Moscow'); +SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Asia/Istanbul'); +SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 0, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.reference b/tests/queries/0_stateless/01926_date_date_time_supertype.reference index ec9933dfbd2..e4e8ddfceab 100644 --- a/tests/queries/0_stateless/01926_date_date_time_supertype.reference +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.reference @@ -1,12 +1,12 @@ Array -Array(DateTime(\'Europe/Moscow\')) -Array(DateTime64(5, \'Europe/Moscow\')) -Array(DateTime64(6, \'Europe/Moscow\')) +Array(DateTime(\'Asia/Istanbul\')) +Array(DateTime64(5, \'Asia/Istanbul\')) +Array(DateTime64(6, \'Asia/Istanbul\')) If -2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') -2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') +2000-01-01 00:00:00 DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00 DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Asia/Istanbul\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Asia/Istanbul\') Cast 2000-01-01 00:00:00 DateTime(\'UTC\') 2000-01-01 00:00:00.00000 DateTime64(5, \'UTC\') diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.sql b/tests/queries/0_stateless/01926_date_date_time_supertype.sql index cce488a5cff..756fd04a01f 100644 --- a/tests/queries/0_stateless/01926_date_date_time_supertype.sql +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.sql @@ -1,8 +1,8 @@ SELECT 'Array'; -SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow')]); -SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow')]); -SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow'), toDateTime64('2000-01-01', 6, 'Europe/Moscow')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Asia/Istanbul')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Asia/Istanbul'), toDateTime64('2000-01-01', 5, 'Asia/Istanbul')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Asia/Istanbul'), toDateTime64('2000-01-01', 5, 'Asia/Istanbul'), toDateTime64('2000-01-01', 6, 'Asia/Istanbul')]); DROP TABLE IF EXISTS predicate_table; CREATE TABLE predicate_table (value UInt8) ENGINE=TinyLog; @@ -11,11 +11,11 @@ INSERT INTO predicate_table VALUES (0), (1); SELECT 'If'; -WITH toDate('2000-01-01') as a, toDateTime('2000-01-01', 'Europe/Moscow') as b +WITH toDate('2000-01-01') as a, toDateTime('2000-01-01', 'Asia/Istanbul') as b SELECT if(value, b, a) as result, toTypeName(result) FROM predicate_table; -WITH toDateTime('2000-01-01', 'Europe/Moscow') as a, toDateTime64('2000-01-01', 5, 'Europe/Moscow') as b +WITH toDateTime('2000-01-01', 'Asia/Istanbul') as a, toDateTime64('2000-01-01', 5, 'Asia/Istanbul') as b SELECT if(value, b, a) as result, toTypeName(result) FROM predicate_table; diff --git a/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql b/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql index 9a25f2b007b..05e5a090d86 100644 --- a/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql +++ b/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql @@ -1 +1 @@ -select toDate32(toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow')), toDateTime64(toDate32('2019-01-01'), 3, 'Europe/Moscow') \ No newline at end of file +select toDate32(toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul')), toDateTime64(toDate32('2019-01-01'), 3, 'Asia/Istanbul') \ No newline at end of file diff --git a/tests/queries/0_stateless/02096_date_time_1970_saturation.sql b/tests/queries/0_stateless/02096_date_time_1970_saturation.sql index e0c401443a7..8cd472c4e6c 100644 --- a/tests/queries/0_stateless/02096_date_time_1970_saturation.sql +++ b/tests/queries/0_stateless/02096_date_time_1970_saturation.sql @@ -1,21 +1,21 @@ select toDate(0); -select toDateTime(0, 'Europe/Moscow'); +select toDateTime(0, 'Asia/Istanbul'); select toMonday(toDate(0)); -select toMonday(toDateTime(0, 'Europe/Moscow')); +select toMonday(toDateTime(0, 'Asia/Istanbul')); select toStartOfWeek(toDate(0)); -select toStartOfWeek(toDateTime(0, 'Europe/Moscow')); +select toStartOfWeek(toDateTime(0, 'Asia/Istanbul')); select toStartOfMonth(toDate(0)); -select toStartOfMonth(toDateTime(0, 'Europe/Moscow')); +select toStartOfMonth(toDateTime(0, 'Asia/Istanbul')); select toStartOfQuarter(toDate(0)); -select toStartOfQuarter(toDateTime(0, 'Europe/Moscow')); +select toStartOfQuarter(toDateTime(0, 'Asia/Istanbul')); select toStartOfYear(toDate(0)); -select toStartOfYear(toDateTime(0, 'Europe/Moscow')); -select toTime(toDateTime(0, 'Europe/Moscow')); -select toStartOfMinute(toDateTime(0, 'Europe/Moscow')); -select toStartOfFiveMinute(toDateTime(0, 'Europe/Moscow')); -select toStartOfTenMinutes(toDateTime(0, 'Europe/Moscow')); -select toStartOfFifteenMinutes(toDateTime(0, 'Europe/Moscow')); -select toStartOfHour(toDateTime(0, 'Europe/Moscow')); +select toStartOfYear(toDateTime(0, 'Asia/Istanbul')); +select toTime(toDateTime(0, 'Asia/Istanbul')); +select toStartOfMinute(toDateTime(0, 'Asia/Istanbul')); +select toStartOfFiveMinute(toDateTime(0, 'Asia/Istanbul')); +select toStartOfTenMinutes(toDateTime(0, 'Asia/Istanbul')); +select toStartOfFifteenMinutes(toDateTime(0, 'Asia/Istanbul')); +select toStartOfHour(toDateTime(0, 'Asia/Istanbul')); select toDateTime(0, 'America/Los_Angeles'); select toMonday(toDateTime(0, 'America/Los_Angeles')); diff --git a/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql index 7f62e187241..df5499df32a 100644 --- a/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql +++ b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql @@ -1,5 +1,5 @@ SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'UTC')); -SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Europe/Moscow')); +SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Asia/Istanbul')); SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Canada/Atlantic')); SELECT toStartOfWeek(toDateTime('1970-01-04 00:00:00')); diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index d129ccc801e..c0463343956 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -38,9 +38,9 @@ SELECT sum(number) FROM numbers3; SHOW CREATE TABLE numbers3; DROP TABLE numbers3; -CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Europe/Moscow'), UTCEventTime DateTime('UTC')) PARTITION BY EventDate PRIMARY KEY CounterID; +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Asia/Istanbul'), UTCEventTime DateTime('UTC')) PARTITION BY EventDate PRIMARY KEY CounterID; SET default_table_engine = 'Memory'; -CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Europe/Moscow')) AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Asia/Istanbul')) AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; SHOW CREATE TABLE test_view_filtered; INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); diff --git a/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns b/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns index 794ee47d757..3bf762ed7d5 100644 --- a/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns +++ b/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns @@ -1 +1 @@ -`a1` Array(Int8), `a2` Array(UInt8), `a3` Array(Int16), `a4` Array(UInt16), `a5` Array(Int32), `a6` Array(UInt32), `a7` Array(Int64), `a8` Array(UInt64), `a9` Array(String), `a10` Array(FixedString(4)), `a11` Array(Float32), `a12` Array(Float64), `a13` Array(Date), `a14` Array(Datetime('Europe/Moscow')), `a15` Array(Decimal(4, 2)), `a16` Array(Decimal(10, 2)), `a17` Array(Decimal(25, 2)) +`a1` Array(Int8), `a2` Array(UInt8), `a3` Array(Int16), `a4` Array(UInt16), `a5` Array(Int32), `a6` Array(UInt32), `a7` Array(Int64), `a8` Array(UInt64), `a9` Array(String), `a10` Array(FixedString(4)), `a11` Array(Float32), `a12` Array(Float64), `a13` Array(Date), `a14` Array(Datetime('Asia/Istanbul')), `a15` Array(Decimal(4, 2)), `a16` Array(Decimal(10, 2)), `a17` Array(Decimal(25, 2)) diff --git a/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns b/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns index df35127ede8..c6d754f04c7 100644 --- a/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns +++ b/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns @@ -1 +1 @@ -`a` Nullable(Int64), `b` Nullable(Float64), `c` Nullable(DateTime('Europe/Moscow')), `index` Nullable(String), `__index_level_1__` Nullable(DateTime('Europe/Moscow')) +`a` Nullable(Int64), `b` Nullable(Float64), `c` Nullable(DateTime('Asia/Istanbul')), `index` Nullable(String), `__index_level_1__` Nullable(DateTime('Asia/Istanbul')) diff --git a/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py b/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py index 92606c9cb26..a1ce8ed7e65 100755 --- a/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py +++ b/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py @@ -4,8 +4,8 @@ import json import sys TYPE_PARQUET_CONVERTED_TO_CLICKHOUSE = { - "TIMESTAMP_MICROS": "DateTime('Europe/Moscow')", - "TIMESTAMP_MILLIS": "DateTime('Europe/Moscow')", + "TIMESTAMP_MICROS": "DateTime('Asia/Istanbul')", + "TIMESTAMP_MILLIS": "DateTime('Asia/Istanbul')", "UTF8": "String", } From 5dcd25be23c60bd7472691818a78ceb87a829b2b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 12 May 2022 00:04:54 +0200 Subject: [PATCH 002/150] Initial implementation --- .../registerDiskAzureBlobStorage.cpp | 4 + src/Disks/DiskObjectStorage.cpp | 1228 +++++++++++++++++ src/Disks/DiskObjectStorage.h | 324 +++++ src/Disks/IDisk.h | 3 +- src/Disks/IDiskRemote.h | 19 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 4 +- src/Disks/IObjectStorage.cpp | 37 + src/Disks/IObjectStorage.h | 122 ++ src/Disks/S3/DiskS3.cpp | 1055 -------------- src/Disks/S3/DiskS3.h | 189 --- src/Disks/S3/diskSettings.cpp | 127 ++ src/Disks/S3/diskSettings.h | 29 + src/Disks/S3/parseConfig.h | 30 + src/Disks/S3/registerDiskS3.cpp | 166 +-- src/Disks/S3ObjectStorage.cpp | 436 ++++++ src/Disks/S3ObjectStorage.h | 130 ++ src/IO/ReadBufferFromS3.cpp | 2 +- src/IO/ReadBufferFromS3.h | 8 +- src/IO/S3Common.cpp | 6 +- src/IO/S3Common.h | 4 +- src/IO/WriteBufferFromS3.cpp | 4 +- src/IO/WriteBufferFromS3.h | 7 +- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 3 +- src/Storages/StorageS3.cpp | 9 +- src/Storages/StorageS3.h | 8 +- tests/integration/test_merge_tree_s3/test.py | 5 +- 29 files changed, 2530 insertions(+), 1435 deletions(-) create mode 100644 src/Disks/DiskObjectStorage.cpp create mode 100644 src/Disks/DiskObjectStorage.h create mode 100644 src/Disks/IObjectStorage.cpp create mode 100644 src/Disks/IObjectStorage.h delete mode 100644 src/Disks/S3/DiskS3.cpp delete mode 100644 src/Disks/S3/DiskS3.h create mode 100644 src/Disks/S3/diskSettings.cpp create mode 100644 src/Disks/S3/diskSettings.h create mode 100644 src/Disks/S3/parseConfig.h create mode 100644 src/Disks/S3ObjectStorage.cpp create mode 100644 src/Disks/S3ObjectStorage.h diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 128c7534b3c..8b2429263bb 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -19,6 +19,9 @@ namespace ErrorCodes extern const int PATH_ACCESS_DENIED; } +namespace +{ + constexpr char test_file[] = "test.txt"; constexpr char test_str[] = "test"; constexpr size_t test_str_size = 4; @@ -71,6 +74,7 @@ std::unique_ptr getSettings(const Poco::Util::Abst ); } +} void registerDiskAzureBlobStorage(DiskFactory & factory) { diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp new file mode 100644 index 00000000000..8fbde6dc6ca --- /dev/null +++ b/src/Disks/DiskObjectStorage.cpp @@ -0,0 +1,1228 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DISK_INDEX; + extern const int UNKNOWN_FORMAT; + extern const int FILE_ALREADY_EXISTS; + extern const int PATH_ACCESS_DENIED;; + extern const int FILE_DOESNT_EXIST; + extern const int BAD_FILE_TYPE; + extern const int MEMORY_LIMIT_EXCEEDED; + extern const int SUPPORT_IS_DISABLED; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + return result; +} + + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.save(sync); + return result; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + return result; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + updater(result); + result.save(sync); + return result; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + metadata_disk_->removeFile(metadata_file_path_); + + return result; + +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) +{ + if (overwrite || !metadata_disk_->exists(metadata_file_path_)) + { + return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); + } + else + { + auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + if (result.read_only) + throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); + return result; + } +} + +void DiskObjectStorage::Metadata::load() +{ + try + { + const ReadSettings read_settings; + auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ + + UInt32 version; + readIntText(version, *buf); + + if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) + throw Exception( + ErrorCodes::UNKNOWN_FORMAT, + "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", + metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); + + assertChar('\n', *buf); + + UInt32 remote_fs_objects_count; + readIntText(remote_fs_objects_count, *buf); + assertChar('\t', *buf); + readIntText(total_size, *buf); + assertChar('\n', *buf); + remote_fs_objects.resize(remote_fs_objects_count); + + for (size_t i = 0; i < remote_fs_objects_count; ++i) + { + String remote_fs_object_path; + size_t remote_fs_object_size; + readIntText(remote_fs_object_size, *buf); + assertChar('\t', *buf); + readEscapedString(remote_fs_object_path, *buf); + if (version == VERSION_ABSOLUTE_PATHS) + { + if (!remote_fs_object_path.starts_with(remote_fs_root_path)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, + "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", + remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); + + remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); + } + assertChar('\n', *buf); + remote_fs_objects[i].relative_path = remote_fs_object_path; + remote_fs_objects[i].bytes_size = remote_fs_object_size; + } + + readIntText(ref_count, *buf); + assertChar('\n', *buf); + + if (version >= VERSION_READ_ONLY_FLAG) + { + readBoolText(read_only, *buf); + assertChar('\n', *buf); + } + } + catch (Exception & e) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) + throw; + + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + throw; + + throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT); + } +} + +/// Load metadata by path or create empty if `create` flag is set. +DiskObjectStorage::Metadata::Metadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_) + : remote_fs_root_path(remote_fs_root_path_) + , metadata_file_path(metadata_file_path_) + , metadata_disk(metadata_disk_) + , total_size(0), ref_count(0) +{ +} + +void DiskObjectStorage::Metadata::addObject(const String & path, size_t size) +{ + total_size += size; + remote_fs_objects.emplace_back(path, size); +} + + +void DiskObjectStorage::Metadata::saveToBuffer(WriteBuffer & buf, bool sync) +{ + writeIntText(VERSION_RELATIVE_PATHS, buf); + writeChar('\n', buf); + + writeIntText(remote_fs_objects.size(), buf); + writeChar('\t', buf); + writeIntText(total_size, buf); + writeChar('\n', buf); + + for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) + { + writeIntText(remote_fs_object_size, buf); + writeChar('\t', buf); + writeEscapedString(remote_fs_object_path, buf); + writeChar('\n', buf); + } + + writeIntText(ref_count, buf); + writeChar('\n', buf); + + writeBoolText(read_only, buf); + writeChar('\n', buf); + + buf.finalize(); + if (sync) + buf.sync(); + +} + +/// Fsync metadata file if 'sync' flag is set. +void DiskObjectStorage::Metadata::save(bool sync) +{ + auto buf = metadata_disk->writeFile(metadata_file_path, 1024); + saveToBuffer(*buf, sync); +} + +std::string DiskObjectStorage::Metadata::serializeToString() +{ + WriteBufferFromOwnString write_buf; + saveToBuffer(write_buf, false); + return write_buf.str(); +} + +DiskObjectStorage::Metadata DiskObjectStorage::readMetadataUnlocked(const String & path, std::shared_lock &) const +{ + return Metadata::readMetadata(remote_fs_root_path, metadata_disk, path); +} + + +DiskObjectStorage::Metadata DiskObjectStorage::readMetadata(const String & path) const +{ + std::shared_lock lock(metadata_mutex); + return readMetadataUnlocked(path, lock); +} + +DiskObjectStorage::Metadata DiskObjectStorage::readUpdateAndStoreMetadata(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + std::unique_lock lock(metadata_mutex); + return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); +} + + +DiskObjectStorage::Metadata DiskObjectStorage::readUpdateStoreMetadataAndRemove(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + std::unique_lock lock(metadata_mutex); + return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater); +} + +DiskObjectStorage::Metadata DiskObjectStorage::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + if (mode == WriteMode::Rewrite || !metadata_disk->exists(path)) + { + std::unique_lock lock(metadata_mutex); + return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); + } + else + { + return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); + } +} + +DiskObjectStorage::Metadata DiskObjectStorage::createAndStoreMetadata(const String & path, bool sync) +{ + return Metadata::createAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync); +} + +DiskObjectStorage::Metadata DiskObjectStorage::createUpdateAndStoreMetadata(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); +} + +std::vector DiskObjectStorage::getRemotePaths(const String & local_path) const +{ + auto metadata = readMetadata(local_path); + + std::vector remote_paths; + for (const auto & [remote_path, _] : metadata.remote_fs_objects) + remote_paths.push_back(fs::path(metadata.remote_fs_root_path) / remote_path); + + return remote_paths; + +} + +void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) +{ + /// Protect against concurrent delition of files (for example because of a merge). + if (metadata_disk->isFile(local_path)) + { + try + { + paths_map.emplace_back(local_path, getRemotePaths(local_path)); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) + return; + throw; + } + } + else + { + DiskDirectoryIteratorPtr it; + try + { + it = iterateDirectory(local_path); + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) + return; + throw; + } + + for (; it->isValid(); it->next()) + DiskObjectStorage::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); + } +} + +bool DiskObjectStorage::exists(const String & path) const +{ + return metadata_disk->exists(path); +} + + +bool DiskObjectStorage::isFile(const String & path) const +{ + return metadata_disk->isFile(path); +} + + +void DiskObjectStorage::createFile(const String & path) +{ + createAndStoreMetadata(path, false); +} + +size_t DiskObjectStorage::getFileSize(const String & path) const +{ + return readMetadata(path).total_size; +} + +void DiskObjectStorage::moveFile(const String & from_path, const String & to_path) +{ + if (exists(to_path)) + throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); + + metadata_disk->moveFile(from_path, to_path); +} + +void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path) +{ + if (exists(to_path)) + { + const String tmp_path = to_path + ".old"; + moveFile(to_path, tmp_path); + moveFile(from_path, to_path); + removeFile(tmp_path); + } + else + moveFile(from_path, to_path); +} + +void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) +{ + std::vector paths_to_remove; + removeMetadata(path, paths_to_remove); + + if (!delete_metadata_only) + removeFromRemoteFS(paths_to_remove); +} + +void DiskObjectStorage::removeFromRemoteFS(const std::vector & paths) +{ + object_storage->removeObjects(paths); +} + +UInt32 DiskObjectStorage::getRefCount(const String & path) const +{ + return readMetadata(path).ref_count; +} + +std::unordered_map DiskObjectStorage::getSerializedMetadata(const std::vector & file_paths) const +{ + std::unordered_map metadatas; + + std::shared_lock lock(metadata_mutex); + + for (const auto & path : file_paths) + { + DiskObjectStorage::Metadata metadata = readMetadataUnlocked(path, lock); + metadata.ref_count = 0; + metadatas[path] = metadata.serializeToString(); + } + + return metadatas; +} + +String DiskObjectStorage::getUniqueId(const String & path) const +{ + LOG_TRACE(log, "Remote path: {}, Path: {}", remote_fs_root_path, path); + auto metadata = readMetadata(path); + String id; + if (!metadata.remote_fs_objects.empty()) + id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].relative_path; + return id; +} + +bool DiskObjectStorage::checkObjectExists(const String & path) const +{ + return object_storage->exists(path); +} + +bool DiskObjectStorage::checkUniqueId(const String & id) const +{ + return checkObjectExists(id); +} + +void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path) +{ + readUpdateAndStoreMetadata(src_path, false, [](Metadata & metadata) { metadata.ref_count++; return true; }); + + /// Create FS hardlink to metadata file. + metadata_disk->createHardLink(src_path, dst_path); + +} + +void DiskObjectStorage::setReadOnly(const String & path) +{ + /// We should store read only flag inside metadata file (instead of using FS flag), + /// because we modify metadata file when create hard-links from it. + readUpdateAndStoreMetadata(path, false, [](Metadata & metadata) { metadata.read_only = true; return true; }); +} + + +bool DiskObjectStorage::isDirectory(const String & path) const +{ + return metadata_disk->isDirectory(path); +} + + +void DiskObjectStorage::createDirectory(const String & path) +{ + metadata_disk->createDirectory(path); +} + + +void DiskObjectStorage::createDirectories(const String & path) +{ + metadata_disk->createDirectories(path); +} + + +void DiskObjectStorage::clearDirectory(const String & path) +{ + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + if (isFile(it->path())) + removeFile(it->path()); +} + + +void DiskObjectStorage::removeDirectory(const String & path) +{ + metadata_disk->removeDirectory(path); +} + + +DiskDirectoryIteratorPtr DiskObjectStorage::iterateDirectory(const String & path) +{ + return metadata_disk->iterateDirectory(path); +} + + +void DiskObjectStorage::listFiles(const String & path, std::vector & file_names) +{ + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + file_names.push_back(it->name()); +} + + +void DiskObjectStorage::setLastModified(const String & path, const Poco::Timestamp & timestamp) +{ + metadata_disk->setLastModified(path, timestamp); +} + + +Poco::Timestamp DiskObjectStorage::getLastModified(const String & path) +{ + return metadata_disk->getLastModified(path); +} + +void DiskObjectStorage::removeMetadata(const String & path, std::vector & paths_to_remove) +{ + LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); + + if (!metadata_disk->exists(path)) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path); + + if (!metadata_disk->isFile(path)) + throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path); + + try + { + auto metadata_updater = [&paths_to_remove, this] (Metadata & metadata) + { + if (metadata.ref_count == 0) + { + for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) + { + paths_to_remove.push_back(fs::path(remote_fs_root_path) / remote_fs_object_path); + object_storage->removeFromCache(fs::path(remote_fs_root_path) / remote_fs_object_path); + } + + return false; + } + else /// In other case decrement number of references, save metadata and delete hardlink. + { + --metadata.ref_count; + } + + return true; + }; + + readUpdateStoreMetadataAndRemove(path, false, metadata_updater); + /// If there is no references - delete content from remote FS. + } + catch (const Exception & e) + { + /// If it's impossible to read meta - just remove it from FS. + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) + { + LOG_WARNING(log, + "Metadata file {} can't be read by reason: {}. Removing it forcibly.", + backQuote(path), e.nested() ? e.nested()->message() : e.message()); + metadata_disk->removeFile(path); + } + else + throw; + } +} + + +void DiskObjectStorage::removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove) +{ + checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. + + if (metadata_disk->isFile(path)) + { + removeMetadata(path, paths_to_remove[path]); + } + else + { + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + removeMetadataRecursive(it->path(), paths_to_remove); + + metadata_disk->removeDirectory(path); + } +} + + +void DiskObjectStorage::shutdown() +{ + object_storage->shutdown(); +} + +void DiskObjectStorage::startup() +{ + + LOG_INFO(log, "Starting up disk {}", name); + object_storage->startup(); + + if (send_metadata) + { + metadata_helper->restore(); + + if (metadata_helper->readSchemaVersion(remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) + metadata_helper->migrateToRestorableSchema(); + + metadata_helper->findLastRevision(); + } + + LOG_INFO(log, "Disk {} started up", name); +} + +ReservationPtr DiskObjectStorage::reserve(UInt64 bytes) +{ + if (!tryReserve(bytes)) + return {}; + + return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); +} + +void DiskObjectStorage::removeSharedFileIfExists(const String & path, bool delete_metadata_only) +{ + std::vector paths_to_remove; + if (metadata_disk->exists(path)) + { + removeMetadata(path, paths_to_remove); + if (!delete_metadata_only) + removeFromRemoteFS(paths_to_remove); + } +} + +void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) +{ + std::unordered_map> paths_to_remove; + removeMetadataRecursive(path, paths_to_remove); + + if (!keep_all_batch_data) + { + std::vector remove_from_remote; + for (auto && [local_path, remote_paths] : paths_to_remove) + { + if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) + remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); + } + removeFromRemoteFS(remove_from_remote); + } +} + +bool DiskObjectStorage::tryReserve(UInt64 bytes) +{ + std::lock_guard lock(reservation_mutex); + if (bytes == 0) + { + LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); + ++reservation_count; + return true; + } + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (unreserved_space >= bytes) + { + LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", + ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); + ++reservation_count; + reserved_bytes += bytes; + return true; + } + return false; +} + +std::unique_ptr DiskObjectStorage::readFile( + const String & path, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size) const +{ + auto metadata = readMetadata(path); + return object_storage->readObjects(remote_fs_root_path, metadata.remote_fs_objects, settings, read_hint, file_size); +} + +std::unique_ptr DiskObjectStorage::writeFile( + const String & path, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) +{ + auto blob_name = getRandomASCIIString(); + + auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) + { + readOrCreateUpdateAndStoreMetadata(path, mode, false, + [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); + }; + + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, {}, create_metadata_callback, buf_size, settings); +} + + +void DiskObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) +{ + object_storage->applyNewSettings(config, "storage_configuration.disks." + name, context_); +} + +DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const +{ + if (i != 0) + throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); + return disk; +} + +void DiskObjectStorageReservation::update(UInt64 new_size) +{ + std::lock_guard lock(disk->reservation_mutex); + disk->reserved_bytes -= size; + size = new_size; + disk->reserved_bytes += size; +} + +DiskObjectStorageReservation::~DiskObjectStorageReservation() +{ + try + { + std::lock_guard lock(disk->reservation_mutex); + if (disk->reserved_bytes < size) + { + disk->reserved_bytes = 0; + LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName()); + } + else + { + disk->reserved_bytes -= size; + } + + if (disk->reservation_count == 0) + LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName()); + else + --disk->reservation_count; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +static String revisionToString(UInt64 revision) +{ + return std::bitset<64>(revision).to_string(); +} + +void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const +{ + const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; + auto buf = disk->object_storage->writeObject(path, metadata); + buf->write('0'); + buf->finalize(); +} + +void DiskObjectStorageMetadataHelper::findLastRevision() +{ + /// Construct revision number from high to low bits. + String revision; + revision.reserve(64); + for (int bit = 0; bit < 64; ++bit) + { + auto revision_prefix = revision + "1"; + + LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix); + + /// Check file or operation with such revision prefix exists. + if (disk->object_storage->exists(disk->remote_fs_root_path + "r" + revision_prefix) + || disk->object_storage->exists(disk->remote_fs_root_path + "operations/r" + revision_prefix)) + revision += "1"; + else + revision += "0"; + } + revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); + LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); +} + +int DiskObjectStorageMetadataHelper::readSchemaVersion(const String & source_path) const +{ + const std::string path = source_path + SCHEMA_VERSION_OBJECT; + int version = 0; + if (!disk->object_storage->exists(path)) + return version; + + auto buf = disk->object_storage->readObject(path); + readIntText(version, *buf); + + return version; +} + +void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) const +{ + auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; + + auto buf = disk->object_storage->writeObject(path); + writeIntText(version, *buf); + buf->finalize(); + +} + +void DiskObjectStorageMetadataHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const +{ + disk->object_storage->copyObject(key, key, metadata); +} + +void DiskObjectStorageMetadataHelper::migrateFileToRestorableSchema(const String & path) const +{ + LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_disk->getPath() + path); + + auto meta = disk->readMetadata(path); + + for (const auto & [key, _] : meta.remote_fs_objects) + { + ObjectAttributes metadata { + {"path", path} + }; + updateObjectMetadata(disk->remote_fs_root_path + key, metadata); + } +} +void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) +{ + checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. + + LOG_TRACE(disk->log, "Migrate directory {} to restorable schema", disk->metadata_disk->getPath() + path); + + bool dir_contains_only_files = true; + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + if (disk->isDirectory(it->path())) + { + dir_contains_only_files = false; + break; + } + + /// The whole directory can be migrated asynchronously. + if (dir_contains_only_files) + { + auto result = disk->getExecutor().execute([this, path] + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + migrateFileToRestorableSchema(it->path()); + }); + + results.push_back(std::move(result)); + } + else + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + if (!disk->isDirectory(it->path())) + { + auto source_path = it->path(); + auto result = disk->getExecutor().execute([this, source_path] + { + migrateFileToRestorableSchema(source_path); + }); + + results.push_back(std::move(result)); + } + else + migrateToRestorableSchemaRecursive(it->path(), results); + } + +} + +void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() +{ + try + { + LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); + + Futures results; + + for (const auto & root : data_roots) + if (disk->exists(root)) + migrateToRestorableSchemaRecursive(root + '/', results); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to migrate to restorable schema for disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::restore() +{ + if (!disk->exists(RESTORE_FILE_NAME)) + return; + + try + { + RestoreInformation information; + information.source_path = disk->remote_fs_root_path; + + readRestoreInformation(information); + if (information.revision == 0) + information.revision = LATEST_REVISION; + if (!information.source_path.ends_with('/')) + information.source_path += '/'; + + /// In this case we need to additionally cleanup S3 from objects with later revision. + /// Will be simply just restore to different path. + if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) + throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); + + /// This case complicates S3 cleanup in case of unsuccessful restore. + if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) + throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + + LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", + disk->name, information.revision, information.source_path); + + if (readSchemaVersion(information.source_path) < RESTORABLE_SCHEMA_VERSION) + throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); + + LOG_INFO(disk->log, "Removing old metadata..."); + + bool cleanup_s3 = information.source_path != disk->remote_fs_root_path; + for (const auto & root : data_roots) + if (disk->exists(root)) + disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); + + restoreFiles(information); + restoreFileOperations(information); + + disk->metadata_disk->removeFile(RESTORE_FILE_NAME); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + + LOG_INFO(disk->log, "Restore disk {} finished", disk->name); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to restore disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) +{ + auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); + buffer->next(); + + try + { + std::map properties; + + while (buffer->hasPendingData()) + { + String property; + readText(property, *buffer); + assertChar('\n', *buffer); + + auto pos = property.find('='); + if (pos == std::string::npos || pos == 0 || pos == property.length()) + throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); + + auto key = property.substr(0, pos); + auto value = property.substr(pos + 1); + + auto it = properties.find(key); + if (it != properties.end()) + throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + + properties[key] = value; + } + + for (const auto & [key, value] : properties) + { + ReadBufferFromString value_buffer (value); + + if (key == "revision") + readIntText(restore_information.revision, value_buffer); + else if (key == "source_path") + readText(restore_information.source_path, value_buffer); + else if (key == "detached") + readBoolTextWord(restore_information.detached, value_buffer); + else + throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + } + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, "Failed to read restore information"); + throw; + } +} + +static String shrinkKey(const String & path, const String & key) +{ + if (!key.starts_with(path)) + throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); + + return key.substr(path.length()); +} + +static std::tuple extractRevisionAndOperationFromKey(const String & key) +{ + String revision_str; + String operation; + /// Key has format: ../../r{revision}-{operation} + static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + + re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + + return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; +} + +void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & restore_information) +{ + LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); + + std::vector> results; + auto restore_files = [this, &restore_information, &results](const BlobsPathToSize & keys) + { + std::vector keys_names; + for (const auto & [key, size] : keys) + { + /// Skip file operations objects. They will be processed separately. + if (key.find("/operations/") != String::npos) + continue; + + const auto [revision, _] = extractRevisionAndOperationFromKey(key); + /// Filter early if it's possible to get revision from key. + if (revision > restore_information.revision) + continue; + + keys_names.push_back(key); + } + + if (!keys_names.empty()) + { + auto result = disk->getExecutor().execute([this, &restore_information, keys_names]() + { + processRestoreFiles(restore_information.source_path, keys_names); + }); + + results.push_back(std::move(result)); + } + + return true; + }; + + BlobsPathToSize children; + disk->object_storage->listPrefix(restore_information.source_path, children); + restore_files(children); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); + +} + +void DiskObjectStorageMetadataHelper::processRestoreFiles(const String & source_path, std::vector keys) +{ + for (const auto & key : keys) + { + auto meta = disk->object_storage->getObjectMetadata(key); + auto object_attributes = meta.attributes; + + String path; + if (object_attributes.has_value()) + { + /// Restore file if object has 'path' in metadata. + auto path_entry = object_attributes->find("path"); + if (path_entry == object_attributes->end()) + { + /// Such keys can remain after migration, we can skip them. + LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); + continue; + } + + path = path_entry->second; + } + else + continue; + + + disk->createDirectories(directoryPath(path)); + auto relative_key = shrinkKey(source_path, key); + + /// Copy object if we restore to different bucket / path. + if (disk->remote_fs_root_path != source_path) + disk->object_storage->copyObject(key, disk->remote_fs_root_path + relative_key); + + auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) + { + metadata.addObject(relative_key, meta.size_bytes); + return true; + }; + + disk->createUpdateAndStoreMetadata(path, false, updater); + + LOG_TRACE(disk->log, "Restored file {}", path); + } + +} + +static String pathToDetached(const String & source_path) +{ + if (source_path.ends_with('/')) + return fs::path(source_path).parent_path().parent_path() / "detached/"; + return fs::path(source_path).parent_path() / "detached/"; +} + +void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInformation & restore_information) +{ + /// Enable recording file operations if we restore to different bucket / path. + bool send_metadata = disk->remote_fs_root_path != restore_information.source_path; + + std::set renames; + auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + { + const String rename = "rename"; + const String hardlink = "hardlink"; + + for (const auto & [key, _]: keys) + { + const auto [revision, operation] = extractRevisionAndOperationFromKey(key); + if (revision == UNKNOWN_REVISION) + { + LOG_WARNING(disk->log, "Skip key {} with unknown revision", key); + continue; + } + + /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). + /// We can stop processing if revision of the object is already more than required. + if (revision > restore_information.revision) + return false; + + /// Keep original revision if restore to different bucket / path. + if (send_metadata) + revision_counter = revision - 1; + + auto object_attributes = *(disk->object_storage->getObjectMetadata(key).attributes); + if (operation == rename) + { + auto from_path = object_attributes["from_path"]; + auto to_path = object_attributes["to_path"]; + if (disk->exists(from_path)) + { + disk->moveFile(from_path, to_path); + if (send_metadata) + { + auto next_revision = ++revision_counter; + const ObjectAttributes object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + createFileOperationObject("rename", next_revision, object_attributes); + } + + LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); + + if (restore_information.detached && disk->isDirectory(to_path)) + { + /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. + if (!from_path.ends_with('/')) + from_path += '/'; + if (!to_path.ends_with('/')) + to_path += '/'; + + /// Always keep latest actual directory path to avoid 'detaching' not existing paths. + auto it = renames.find(from_path); + if (it != renames.end()) + renames.erase(it); + + renames.insert(to_path); + } + } + } + else if (operation == hardlink) + { + auto src_path = object_attributes["src_path"]; + auto dst_path = object_attributes["dst_path"]; + if (disk->exists(src_path)) + { + disk->createDirectories(directoryPath(dst_path)); + if (send_metadata && !dst_path.starts_with("shadow/")) + { + auto next_revision = ++revision_counter; + const ObjectAttributes object_metadata { + {"src_path", src_path}, + {"dst_path", dst_path} + }; + createFileOperationObject("hardlink", next_revision, object_attributes); + } + disk->createHardLink(src_path, dst_path); + LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); + } + } + } + + return true; + }; + + BlobsPathToSize children; + disk->object_storage->listPrefix(restore_information.source_path + "operations/", children); + restore_file_operations(children); + + if (restore_information.detached) + { + Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; + + for (const auto & path : renames) + { + /// Skip already detached parts. + if (path.find("/detached/") != std::string::npos) + continue; + + /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. + fs::path directory_path(path); + auto directory_name = directory_path.parent_path().filename().string(); + + auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; + if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) + continue; + + auto detached_path = pathToDetached(path); + + LOG_TRACE(disk->log, "Move directory to 'detached' {} -> {}", path, detached_path); + + fs::path from_path = fs::path(path); + fs::path to_path = fs::path(detached_path); + if (path.ends_with('/')) + to_path /= from_path.parent_path().filename(); + else + to_path /= from_path.filename(); + + /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename + if (disk->metadata_disk->exists(to_path)) + disk->metadata_disk->removeRecursive(to_path); + + disk->createDirectories(directoryPath(to_path)); + disk->metadata_disk->moveDirectory(from_path, to_path); + } + } + + LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); +} + + +} diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h new file mode 100644 index 00000000000..2147f9527d5 --- /dev/null +++ b/src/Disks/DiskObjectStorage.h @@ -0,0 +1,324 @@ +#pragma once + +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric DiskSpaceReservedForMerge; +} + +namespace DB +{ + +class DiskObjectStorageMetadataHelper; + +class DiskObjectStorage : public IDisk +{ + +friend class DiskObjectStorageReservation; +friend class DiskObjectStorageMetadataHelper; + +public: + DiskObjectStorage( + const String & name_, + const String & remote_fs_root_path_, + const String & log_name, + DiskPtr metadata_disk_, + ObjectStoragePtr && object_storage_, + DiskType disk_type_, + bool send_metadata_) + : name(name_) + , remote_fs_root_path(remote_fs_root_path_) + , log (&Poco::Logger::get(log_name)) + , metadata_disk(metadata_disk_) + , disk_type(disk_type_) + , object_storage(std::move(object_storage_)) + , send_metadata(send_metadata_) + , metadata_helper(std::make_unique(this, ReadSettings{})) + {} + + DiskType getType() const override { return disk_type; } + + bool supportZeroCopyReplication() const override { return true; } + + bool supportParallelWrite() const override { return true; } + + struct Metadata; + using MetadataUpdater = std::function; + + const String & getName() const final override { return name; } + + const String & getPath() const final override { return metadata_disk->getPath(); } + + std::vector getRemotePaths(const String & local_path) const final override; + + void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; + + /// Methods for working with metadata. For some operations (like hardlink + /// creation) metadata can be updated concurrently from multiple threads + /// (file actually rewritten on disk). So additional RW lock is required for + /// metadata read and write, but not for create new metadata. + Metadata readMetadata(const String & path) const; + Metadata readMetadataUnlocked(const String & path, std::shared_lock &) const; + Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); + Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater); + + Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater); + + Metadata createAndStoreMetadata(const String & path, bool sync); + Metadata createUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); + + UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } + + UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } + + UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } + + UInt64 getKeepingFreeSpace() const override { return 0; } + + bool exists(const String & path) const override; + + bool isFile(const String & path) const override; + + void createFile(const String & path) override; + + size_t getFileSize(const String & path) const override; + + void moveFile(const String & from_path, const String & to_path) override; + + void replaceFile(const String & from_path, const String & to_path) override; + + void removeFile(const String & path) override { removeSharedFile(path, false); } + + void removeFileIfExists(const String & path) override { removeSharedFileIfExists(path, false); } + + void removeRecursive(const String & path) override { removeSharedRecursive(path, false, {}); } + + void removeSharedFile(const String & path, bool delete_metadata_only) override; + + void removeSharedFileIfExists(const String & path, bool delete_metadata_only) override; + + void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; + + void removeFromRemoteFS(const std::vector & paths); + + DiskPtr getMetadataDiskIfExistsOrSelf() override { return metadata_disk; } + + UInt32 getRefCount(const String & path) const override; + + /// Return metadata for each file path. Also, before serialization reset + /// ref_count for each metadata to zero. This function used only for remote + /// fetches/sends in replicated engines. That's why we reset ref_count to zero. + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; + + String getUniqueId(const String & path) const override; + + bool checkObjectExists(const String & path) const; + bool checkUniqueId(const String & id) const override; + + void createHardLink(const String & src_path, const String & dst_path) override; + + void listFiles(const String & path, std::vector & file_names) override; + + void setReadOnly(const String & path) override; + + bool isDirectory(const String & path) const override; + + void createDirectory(const String & path) override; + + void createDirectories(const String & path) override; + + void clearDirectory(const String & path) override; + + void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } + + void removeDirectory(const String & path) override; + + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; + + Poco::Timestamp getLastModified(const String & path) override; + + bool isRemote() const override { return true; } + + void shutdown() override; + + void startup() override; + + ReservationPtr reserve(UInt64 bytes) override; + + std::unique_ptr readFile( + const String & path, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size) const override; + + std::unique_ptr writeFile( + const String & path, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) override; + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; + +private: + const String name; + const String remote_fs_root_path; + Poco::Logger * log; + DiskPtr metadata_disk; + + const DiskType disk_type; + ObjectStoragePtr object_storage; + + UInt64 reserved_bytes = 0; + UInt64 reservation_count = 0; + std::mutex reservation_mutex; + + mutable std::shared_mutex metadata_mutex; + void removeMetadata(const String & path, std::vector & paths_to_remove); + + void removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove); + + bool tryReserve(UInt64 bytes); + + bool send_metadata; + + std::unique_ptr metadata_helper; +}; + +struct DiskObjectStorage::Metadata +{ + using Updater = std::function; + /// Metadata file version. + static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; + static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; + static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; + + /// Remote FS objects paths and their sizes. + std::vector remote_fs_objects; + + /// URI + const String & remote_fs_root_path; + + /// Relative path to metadata file on local FS. + const String metadata_file_path; + + DiskPtr metadata_disk; + + /// Total size of all remote FS (S3, HDFS) objects. + size_t total_size = 0; + + /// Number of references (hardlinks) to this metadata file. + /// + /// FIXME: Why we are tracking it explicetly, without + /// info from filesystem???? + UInt32 ref_count = 0; + + /// Flag indicates that file is read only. + bool read_only = false; + + Metadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_); + + void addObject(const String & path, size_t size); + + static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); + static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + + static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); + static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static Metadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); + + /// Serialize metadata to string (very same with saveToBuffer) + std::string serializeToString(); + +private: + /// Fsync metadata file if 'sync' flag is set. + void save(bool sync = false); + void saveToBuffer(WriteBuffer & buffer, bool sync); + void load(); +}; + +class DiskObjectStorageReservation final : public IReservation +{ +public: + DiskObjectStorageReservation(const std::shared_ptr & disk_, UInt64 size_) + : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + { + } + + UInt64 getSize() const override { return size; } + + DiskPtr getDisk(size_t i) const override; + + Disks getDisks() const override { return {disk}; } + + void update(UInt64 new_size) override; + + ~DiskObjectStorageReservation() override; + +private: + std::shared_ptr disk; + UInt64 size; + CurrentMetrics::Increment metric_increment; +}; + +class DiskObjectStorageMetadataHelper +{ +public: + static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); + static constexpr UInt64 UNKNOWN_REVISION = 0; + + DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) + : disk(disk_) + , read_settings(std::move(read_settings_)) + { + } + + struct RestoreInformation + { + UInt64 revision = LATEST_REVISION; + String source_path; + bool detached = false; + }; + + using Futures = std::vector>; + + void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + void findLastRevision(); + + int readSchemaVersion(const String & source_path) const; + void saveSchemaVersion(const int & version) const; + void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; + void migrateFileToRestorableSchema(const String & path) const; + void migrateToRestorableSchemaRecursive(const String & path, Futures & results); + void migrateToRestorableSchema(); + + void restore(); + void readRestoreInformation(RestoreInformation & restore_information); + void restoreFiles(const RestoreInformation & restore_information); + void processRestoreFiles(const String & source_path, std::vector keys); + void restoreFileOperations(const RestoreInformation & restore_information); + + std::atomic revision_counter = 0; + inline static const String RESTORE_FILE_NAME = "restore"; + + /// Object contains information about schema version. + inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; + /// Version with possibility to backup-restore metadata. + static constexpr int RESTORABLE_SCHEMA_VERSION = 1; + /// Directories with data. + const std::vector data_roots {"data", "store"}; + + DiskObjectStorage * disk; + + ReadSettings read_settings; +}; + +} diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index c4578d51b6e..1071e1294b6 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -289,7 +290,7 @@ public: virtual bool isReadOnly() const { return false; } - /// Check if disk is broken. Broken disks will have 0 space and not be used. + /// Check if disk is broken. Broken disks will have 0 space and connot be used. virtual bool isBroken() const { return false; } /// Invoked when Global Context is shutdown. diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 65bcdf3e719..327452c0bbf 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -21,24 +21,6 @@ namespace CurrentMetrics namespace DB { -/// Path to blob with it's size -struct BlobPathWithSize -{ - std::string relative_path; - uint64_t bytes_size; - - BlobPathWithSize() = default; - BlobPathWithSize(const BlobPathWithSize & other) = default; - - BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_) - : relative_path(relative_path_) - , bytes_size(bytes_size_) - {} -}; - -/// List of blobs with their sizes -using BlobsPathToSize = std::vector; - class IAsynchronousReader; using AsynchronousReaderPtr = std::shared_ptr; @@ -153,6 +135,7 @@ public: virtual void removeFromRemoteFS(const std::vector & paths) = 0; static AsynchronousReaderPtr getThreadPoolReader(); + static ThreadPool & getThreadPoolWriter(); DiskPtr getMetadataDiskIfExistsOrSelf() override { return metadata_disk; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index da2d1dee4b2..f0beaab67cf 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -98,7 +98,7 @@ class ReadBufferFromS3Gather final : public ReadBufferFromRemoteFSGather { public: ReadBufferFromS3Gather( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & version_id_, const std::string & common_path_prefix_, @@ -116,7 +116,7 @@ public: SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; String bucket; String version_id; UInt64 max_single_read_retries; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp new file mode 100644 index 00000000000..ac8f3fc39e8 --- /dev/null +++ b/src/Disks/IObjectStorage.cpp @@ -0,0 +1,37 @@ +#include +#include + +namespace DB +{ +AsynchronousReaderPtr IObjectStorage::getThreadPoolReader() +{ + constexpr size_t pool_size = 50; + constexpr size_t queue_size = 1000000; + static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); + return reader; +} + +ThreadPool & IObjectStorage::getThreadPoolWriter() +{ + constexpr size_t pool_size = 100; + constexpr size_t queue_size = 1000000; + static ThreadPool writer(pool_size, pool_size, queue_size); + return writer; +} + + +std::string IObjectStorage::getCacheBasePath() const +{ + return cache ? cache->getBasePath() : ""; +} + +void IObjectStorage::removeFromCache(const std::string & path) +{ + if (cache) + { + auto key = cache->hash(path); + cache->remove(key); + } +} + +} diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h new file mode 100644 index 00000000000..f2cc9b90294 --- /dev/null +++ b/src/Disks/IObjectStorage.h @@ -0,0 +1,122 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +class ReadBufferFromFileBase; +class WriteBufferFromFileBase; + +using ObjectAttributes = std::map; + +/// Path to blob with it's size +struct BlobPathWithSize +{ + std::string relative_path; + uint64_t bytes_size; + + BlobPathWithSize() = default; + BlobPathWithSize(const BlobPathWithSize & other) = default; + + BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_) + : relative_path(relative_path_) + , bytes_size(bytes_size_) + {} +}; + +/// List of blobs with their sizes +using BlobsPathToSize = std::vector; + +struct ObjectMetadata +{ + uint64_t size_bytes; + std::optional last_modified; + std::optional attributes; +}; + +using FinalizeCallback = std::function; + +class IObjectStorage +{ +public: + explicit IObjectStorage(FileCachePtr && cache_) + : cache(std::move(cache_)) + {} + + virtual bool exists(const std::string & path) const = 0; + + virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; + + virtual ObjectMetadata getObjectMetadata(const std::string & path) const = 0; + + virtual std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const = 0; + + virtual std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const = 0; + + /// Open the file for write and return WriteBufferFromFileBase object. + virtual std::unique_ptr writeObject( /// NOLINT + const std::string & path, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) = 0; + + /// Remove file. Throws exception if file doesn't exists or it's a directory. + virtual void removeObject(const std::string & path) = 0; + + virtual void removeObjects(const std::vector & paths) = 0; + + /// Remove file if it exists. + virtual void removeObjectIfExists(const std::string & path) = 0; + + virtual void removeObjectsIfExist(const std::vector & paths) = 0; + + virtual void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + + virtual ~IObjectStorage() = default; + + std::string getCacheBasePath() const; + + static AsynchronousReaderPtr getThreadPoolReader(); + + static ThreadPool & getThreadPoolWriter(); + + virtual void shutdown() = 0; + + virtual void startup() = 0; + + void removeFromCache(const std::string & path); + + virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + +protected: + FileCachePtr cache; +}; + +using ObjectStoragePtr = std::unique_ptr; + +} diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp deleted file mode 100644 index 5e3d7031350..00000000000 --- a/src/Disks/S3/DiskS3.cpp +++ /dev/null @@ -1,1055 +0,0 @@ -#include "DiskS3.h" - -#if USE_AWS_S3 -#include "Disks/DiskFactory.h" - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int S3_ERROR; - extern const int FILE_ALREADY_EXISTS; - extern const int UNKNOWN_FORMAT; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; -} - -template -void throwIfError(Aws::Utils::Outcome & response) -{ - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); - } -} - -template -void throwIfError(const Aws::Utils::Outcome & response) -{ - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw Exception(err.GetMessage(), static_cast(err.GetErrorType())); - } -} -template -void logIfError(Aws::Utils::Outcome & response, Fn auto && msg) -{ - try - { - throwIfError(response); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__, msg()); - } -} - -template -void logIfError(const Aws::Utils::Outcome & response, Fn auto && msg) -{ - try - { - throwIfError(response); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__, msg()); - } -} - -DiskS3::DiskS3( - String name_, - String bucket_, - String s3_root_path_, - String version_id_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - ContextPtr context_, - SettingsPtr settings_, - GetDiskSettings settings_getter_) - : IDiskRemote(name_, s3_root_path_, metadata_disk_, std::move(cache_), "DiskS3", settings_->thread_pool_size) - , bucket(std::move(bucket_)) - , version_id(std::move(version_id_)) - , current_settings(std::move(settings_)) - , settings_getter(settings_getter_) - , context(context_) -{ -} - -void DiskS3::removeFromRemoteFS(const std::vector & paths) -{ - auto settings = current_settings.get(); - - size_t chunk_size_limit = settings->objects_chunk_size_to_delete; - size_t current_position = 0; - while (current_position < paths.size()) - { - std::vector current_chunk; - String keys; - for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(paths[current_position]); - current_chunk.push_back(obj); - - if (!keys.empty()) - keys += ", "; - keys += paths[current_position]; - } - - LOG_TRACE(log, "Remove AWS keys {}", keys); - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(current_chunk); - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = settings->client->DeleteObjects(request); - logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); - } -} - -void DiskS3::moveFile(const String & from_path, const String & to_path) -{ - auto settings = current_settings.get(); - - moveFile(from_path, to_path, settings->send_metadata); -} - -void DiskS3::moveFile(const String & from_path, const String & to_path, bool send_metadata) -{ - if (exists(to_path)) - throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); - - if (send_metadata) - { - auto revision = ++revision_counter; - const ObjectMetadata object_metadata { - {"from_path", from_path}, - {"to_path", to_path} - }; - createFileOperationObject("rename", revision, object_metadata); - } - metadata_disk->moveFile(from_path, to_path); -} - -std::unique_ptr DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional, std::optional) const -{ - auto settings = current_settings.get(); - auto metadata = readMetadata(path); - - LOG_TEST(log, "Read from file by path: {}. Existing S3 objects: {}", - backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - - ReadSettings disk_read_settings{read_settings}; - if (cache) - { - if (IFileCache::isReadOnly()) - disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; - - disk_read_settings.remote_fs_cache = cache; - } - - auto s3_impl = std::make_unique( - settings->client, bucket, version_id, metadata.remote_fs_root_path, metadata.remote_fs_objects, - settings->s3_settings.max_single_read_retries, disk_read_settings); - - if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - auto reader = getThreadPoolReader(); - return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); - } - else - { - auto buf = std::make_unique(std::move(s3_impl)); - return std::make_unique(std::move(buf), settings->min_bytes_for_seek); - } -} - -std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & write_settings) -{ - auto settings = current_settings.get(); - - /// Path to store new S3 object. - auto blob_name = getRandomASCIIString(); - - std::optional object_metadata; - if (settings->send_metadata) - { - auto revision = ++revision_counter; - object_metadata = { - {"path", path} - }; - blob_name = "r" + revisionToString(revision) + "-file-" + blob_name; - } - - LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", - mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - - bool cache_on_write = cache - && fs::path(path).extension() != ".tmp" - && write_settings.enable_filesystem_cache_on_write_operations - && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; - - auto s3_buffer = std::make_unique( - settings->client, - bucket, - fs::path(remote_fs_root_path) / blob_name, - settings->s3_settings, - std::move(object_metadata), - buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), cache_on_write ? cache : nullptr); - - auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) - { - readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_name, count] (Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); - }; - - return std::make_unique(std::move(s3_buffer), std::move(create_metadata_callback), fs::path(remote_fs_root_path) / blob_name); -} - -void DiskS3::createHardLink(const String & src_path, const String & dst_path) -{ - auto settings = current_settings.get(); - createHardLink(src_path, dst_path, settings->send_metadata); -} - -void DiskS3::createHardLink(const String & src_path, const String & dst_path, bool send_metadata) -{ - /// We don't need to record hardlinks created to shadow folder. - if (send_metadata && !dst_path.starts_with("shadow/")) - { - auto revision = ++revision_counter; - const ObjectMetadata object_metadata { - {"src_path", src_path}, - {"dst_path", dst_path} - }; - createFileOperationObject("hardlink", revision, object_metadata); - } - - IDiskRemote::createHardLink(src_path, dst_path); -} - -void DiskS3::shutdown() -{ - auto settings = current_settings.get(); - /// This call stops any next retry attempts for ongoing S3 requests. - /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. - /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. - /// This should significantly speed up shutdown process if S3 is unhealthy. - settings->client->DisableRequestProcessing(); -} - -void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata) -{ - auto settings = current_settings.get(); - const String key = "operations/r" + revisionToString(revision) + "-" + operation_name; - WriteBufferFromS3 buffer( - settings->client, - bucket, - remote_fs_root_path + key, - settings->s3_settings, - metadata); - - buffer.write('0'); - buffer.finalize(); -} - -void DiskS3::startup() -{ - auto settings = current_settings.get(); - - /// Need to be enabled if it was disabled during shutdown() call. - settings->client->EnableRequestProcessing(); - - if (!settings->send_metadata) - return; - - LOG_INFO(log, "Starting up disk {}", name); - - restore(); - - if (readSchemaVersion(bucket, remote_fs_root_path) < RESTORABLE_SCHEMA_VERSION) - migrateToRestorableSchema(); - - findLastRevision(); - - LOG_INFO(log, "Disk {} started up", name); -} - -void DiskS3::findLastRevision() -{ - /// Construct revision number from high to low bits. - String revision; - revision.reserve(64); - for (int bit = 0; bit < 64; ++bit) - { - auto revision_prefix = revision + "1"; - - LOG_TRACE(log, "Check object exists with revision prefix {}", revision_prefix); - - /// Check file or operation with such revision prefix exists. - if (checkObjectExists(bucket, remote_fs_root_path + "r" + revision_prefix) - || checkObjectExists(bucket, remote_fs_root_path + "operations/r" + revision_prefix)) - revision += "1"; - else - revision += "0"; - } - revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); - LOG_INFO(log, "Found last revision number {} for disk {}", revision_counter, name); -} - -int DiskS3::readSchemaVersion(const String & source_bucket, const String & source_path) -{ - int version = 0; - if (!checkObjectExists(source_bucket, source_path + SCHEMA_VERSION_OBJECT)) - return version; - - auto settings = current_settings.get(); - ReadBufferFromS3 buffer( - settings->client, - source_bucket, - source_path + SCHEMA_VERSION_OBJECT, - version_id, - settings->s3_settings.max_single_read_retries, - context->getReadSettings()); - - readIntText(version, buffer); - - return version; -} - -void DiskS3::saveSchemaVersion(const int & version) -{ - auto settings = current_settings.get(); - - WriteBufferFromS3 buffer( - settings->client, - bucket, - remote_fs_root_path + SCHEMA_VERSION_OBJECT, - settings->s3_settings); - - writeIntText(version, buffer); - buffer.finalize(); -} - -void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & metadata) -{ - copyObjectImpl(bucket, key, bucket, key, std::nullopt, metadata); -} - -void DiskS3::migrateFileToRestorableSchema(const String & path) -{ - LOG_TRACE(log, "Migrate file {} to restorable schema", metadata_disk->getPath() + path); - - auto meta = readMetadata(path); - - for (const auto & [key, _] : meta.remote_fs_objects) - { - ObjectMetadata metadata { - {"path", path} - }; - updateObjectMetadata(remote_fs_root_path + key, metadata); - } -} - -void DiskS3::migrateToRestorableSchemaRecursive(const String & path, Futures & results) -{ - checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - - LOG_TRACE(log, "Migrate directory {} to restorable schema", metadata_disk->getPath() + path); - - bool dir_contains_only_files = true; - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - if (isDirectory(it->path())) - { - dir_contains_only_files = false; - break; - } - - /// The whole directory can be migrated asynchronously. - if (dir_contains_only_files) - { - auto result = getExecutor().execute([this, path] - { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - migrateFileToRestorableSchema(it->path()); - }); - - results.push_back(std::move(result)); - } - else - { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - if (!isDirectory(it->path())) - { - auto source_path = it->path(); - auto result = getExecutor().execute([this, source_path] - { - migrateFileToRestorableSchema(source_path); - }); - - results.push_back(std::move(result)); - } - else - migrateToRestorableSchemaRecursive(it->path(), results); - } -} - -void DiskS3::migrateToRestorableSchema() -{ - try - { - LOG_INFO(log, "Start migration to restorable schema for disk {}", name); - - Futures results; - - for (const auto & root : data_roots) - if (exists(root)) - migrateToRestorableSchemaRecursive(root + '/', results); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - } - catch (const Exception &) - { - tryLogCurrentException(log, fmt::format("Failed to migrate to restorable schema for disk {}", name)); - - throw; - } -} - -bool DiskS3::checkObjectExists(const String & source_bucket, const String & prefix) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(source_bucket); - request.SetPrefix(prefix); - request.SetMaxKeys(1); - - auto outcome = settings->client->ListObjectsV2(request); - throwIfError(outcome); - - return !outcome.GetResult().GetContents().empty(); -} - -bool DiskS3::checkUniqueId(const String & id) const -{ - auto settings = current_settings.get(); - /// Check that we have right s3 and have access rights - /// Actually interprets id as s3 object name and checks if it exists - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(bucket); - request.SetPrefix(id); - - auto outcome = settings->client->ListObjectsV2(request); - throwIfError(outcome); - - Aws::Vector object_list = outcome.GetResult().GetContents(); - - for (const auto & object : object_list) - if (object.GetKey() == id) - return true; - return false; -} - -Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(source_bucket); - request.SetKey(key); - - auto outcome = settings->client->HeadObject(request); - throwIfError(outcome); - - return outcome.GetResultWithOwnership(); -} - -void DiskS3::listObjects(const String & source_bucket, const String & source_path, std::function callback) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(source_bucket); - request.SetPrefix(source_path); - request.SetMaxKeys(settings->list_object_keys_size); - - Aws::S3::Model::ListObjectsV2Outcome outcome; - do - { - outcome = settings->client->ListObjectsV2(request); - throwIfError(outcome); - - bool should_continue = callback(outcome.GetResult()); - - if (!should_continue) - break; - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - } while (outcome.GetResult().GetIsTruncated()); -} - -void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head) const -{ - if (head && (head->GetContentLength() >= static_cast(5_GiB))) - copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head); - else - copyObjectImpl(src_bucket, src_key, dst_bucket, dst_key); -} - -void DiskS3::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head, - std::optional> metadata) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::CopyObjectRequest request; - request.SetCopySource(src_bucket + "/" + src_key); - request.SetBucket(dst_bucket); - request.SetKey(dst_key); - if (metadata) - { - request.SetMetadata(*metadata); - request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); - } - - auto outcome = settings->client->CopyObject(request); - - if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") - { // Can't come here with MinIO, MinIO allows single part upload for large objects. - copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); - return; - } - - throwIfError(outcome); -} - -void DiskS3::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head, - std::optional> metadata) const -{ - LOG_TRACE(log, "Multipart copy upload has created. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, Metadata: {}", - src_bucket, src_key, dst_bucket, dst_key, metadata ? "REPLACE" : "NOT_SET"); - - auto settings = current_settings.get(); - - if (!head) - head = headObject(src_bucket, src_key); - - size_t size = head->GetContentLength(); - - String multipart_upload_id; - - { - Aws::S3::Model::CreateMultipartUploadRequest request; - request.SetBucket(dst_bucket); - request.SetKey(dst_key); - if (metadata) - request.SetMetadata(*metadata); - - auto outcome = settings->client->CreateMultipartUpload(request); - - throwIfError(outcome); - - multipart_upload_id = outcome.GetResult().GetUploadId(); - } - - std::vector part_tags; - - size_t upload_part_size = settings->s3_settings.min_upload_part_size; - for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) - { - Aws::S3::Model::UploadPartCopyRequest part_request; - part_request.SetCopySource(src_bucket + "/" + src_key); - part_request.SetBucket(dst_bucket); - part_request.SetKey(dst_key); - part_request.SetUploadId(multipart_upload_id); - part_request.SetPartNumber(part_number); - part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); - - auto outcome = settings->client->UploadPartCopy(part_request); - if (!outcome.IsSuccess()) - { - Aws::S3::Model::AbortMultipartUploadRequest abort_request; - abort_request.SetBucket(dst_bucket); - abort_request.SetKey(dst_key); - abort_request.SetUploadId(multipart_upload_id); - settings->client->AbortMultipartUpload(abort_request); - // In error case we throw exception later with first error from UploadPartCopy - } - throwIfError(outcome); - - auto etag = outcome.GetResult().GetCopyPartResult().GetETag(); - part_tags.push_back(etag); - } - - { - Aws::S3::Model::CompleteMultipartUploadRequest req; - req.SetBucket(dst_bucket); - req.SetKey(dst_key); - req.SetUploadId(multipart_upload_id); - - Aws::S3::Model::CompletedMultipartUpload multipart_upload; - for (size_t i = 0; i < part_tags.size(); ++i) - { - Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); - } - - req.SetMultipartUpload(multipart_upload); - - auto outcome = settings->client->CompleteMultipartUpload(req); - - throwIfError(outcome); - - LOG_TRACE(log, "Multipart copy upload has completed. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, " - "Upload_id: {}, Parts: {}", src_bucket, src_key, dst_bucket, dst_key, multipart_upload_id, part_tags.size()); - } -} - -struct DiskS3::RestoreInformation -{ - UInt64 revision = LATEST_REVISION; - String source_bucket; - String source_path; - bool detached = false; -}; - -void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_information) -{ - const ReadSettings read_settings; - auto buffer = metadata_disk->readFile(RESTORE_FILE_NAME, read_settings, 512); - buffer->next(); - - try - { - std::map properties; - - while (buffer->hasPendingData()) - { - String property; - readText(property, *buffer); - assertChar('\n', *buffer); - - auto pos = property.find('='); - if (pos == String::npos || pos == 0 || pos == property.length()) - throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); - - auto key = property.substr(0, pos); - auto value = property.substr(pos + 1); - - auto it = properties.find(key); - if (it != properties.end()) - throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - - properties[key] = value; - } - - for (const auto & [key, value] : properties) - { - ReadBufferFromString value_buffer (value); - - if (key == "revision") - readIntText(restore_information.revision, value_buffer); - else if (key == "source_bucket") - readText(restore_information.source_bucket, value_buffer); - else if (key == "source_path") - readText(restore_information.source_path, value_buffer); - else if (key == "detached") - readBoolTextWord(restore_information.detached, value_buffer); - else - throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - } - } - catch (const Exception &) - { - tryLogCurrentException(log, "Failed to read restore information"); - throw; - } -} - -void DiskS3::restore() -{ - if (!exists(RESTORE_FILE_NAME)) - return; - - try - { - RestoreInformation information; - information.source_bucket = bucket; - information.source_path = remote_fs_root_path; - - readRestoreInformation(information); - if (information.revision == 0) - information.revision = LATEST_REVISION; - if (!information.source_path.ends_with('/')) - information.source_path += '/'; - - if (information.source_bucket == bucket) - { - /// In this case we need to additionally cleanup S3 from objects with later revision. - /// Will be simply just restore to different path. - if (information.source_path == remote_fs_root_path && information.revision != LATEST_REVISION) - throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); - - /// This case complicates S3 cleanup in case of unsuccessful restore. - if (information.source_path != remote_fs_root_path && remote_fs_root_path.starts_with(information.source_path)) - throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); - } - - LOG_INFO(log, "Starting to restore disk {}. Revision: {}, Source bucket: {}, Source path: {}", - name, information.revision, information.source_bucket, information.source_path); - - if (readSchemaVersion(information.source_bucket, information.source_path) < RESTORABLE_SCHEMA_VERSION) - throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); - - LOG_INFO(log, "Removing old metadata..."); - - bool cleanup_s3 = information.source_bucket != bucket || information.source_path != remote_fs_root_path; - for (const auto & root : data_roots) - if (exists(root)) - removeSharedRecursive(root + '/', !cleanup_s3, {}); - - restoreFiles(information); - restoreFileOperations(information); - - metadata_disk->removeFile(RESTORE_FILE_NAME); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - - LOG_INFO(log, "Restore disk {} finished", name); - } - catch (const Exception &) - { - tryLogCurrentException(log, fmt::format("Failed to restore disk {}", name)); - - throw; - } -} - -void DiskS3::restoreFiles(const RestoreInformation & restore_information) -{ - LOG_INFO(log, "Starting restore files for disk {}", name); - - std::vector> results; - auto restore_files = [this, &restore_information, &results](auto list_result) - { - std::vector keys; - for (const auto & row : list_result.GetContents()) - { - const String & key = row.GetKey(); - - /// Skip file operations objects. They will be processed separately. - if (key.find("/operations/") != String::npos) - continue; - - const auto [revision, _] = extractRevisionAndOperationFromKey(key); - /// Filter early if it's possible to get revision from key. - if (revision > restore_information.revision) - continue; - - keys.push_back(key); - } - - if (!keys.empty()) - { - auto result = getExecutor().execute([this, &restore_information, keys]() - { - processRestoreFiles(restore_information.source_bucket, restore_information.source_path, keys); - }); - - results.push_back(std::move(result)); - } - - return true; - }; - - /// Execute. - listObjects(restore_information.source_bucket, restore_information.source_path, restore_files); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - LOG_INFO(log, "Files are restored for disk {}", name); -} - -void DiskS3::processRestoreFiles(const String & source_bucket, const String & source_path, Strings keys) -{ - for (const auto & key : keys) - { - auto head_result = headObject(source_bucket, key); - auto object_metadata = head_result.GetMetadata(); - - /// Restore file if object has 'path' in metadata. - auto path_entry = object_metadata.find("path"); - if (path_entry == object_metadata.end()) - { - /// Such keys can remain after migration, we can skip them. - LOG_WARNING(log, "Skip key {} because it doesn't have 'path' in metadata", key); - continue; - } - - const auto & path = path_entry->second; - - createDirectories(directoryPath(path)); - auto relative_key = shrinkKey(source_path, key); - - /// Copy object if we restore to different bucket / path. - if (bucket != source_bucket || remote_fs_root_path != source_path) - copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key, head_result); - - auto updater = [relative_key, head_result] (Metadata & metadata) - { - metadata.addObject(relative_key, head_result.GetContentLength()); - return true; - }; - - createUpdateAndStoreMetadata(path, false, updater); - - LOG_TRACE(log, "Restored file {}", path); - } -} - -void DiskS3::restoreFileOperations(const RestoreInformation & restore_information) -{ - auto settings = current_settings.get(); - - LOG_INFO(log, "Starting restore file operations for disk {}", name); - - /// Enable recording file operations if we restore to different bucket / path. - bool send_metadata = bucket != restore_information.source_bucket || remote_fs_root_path != restore_information.source_path; - - std::set renames; - auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](auto list_result) - { - const String rename = "rename"; - const String hardlink = "hardlink"; - - for (const auto & row : list_result.GetContents()) - { - const String & key = row.GetKey(); - - const auto [revision, operation] = extractRevisionAndOperationFromKey(key); - if (revision == UNKNOWN_REVISION) - { - LOG_WARNING(log, "Skip key {} with unknown revision", key); - continue; - } - - /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). - /// We can stop processing if revision of the object is already more than required. - if (revision > restore_information.revision) - return false; - - /// Keep original revision if restore to different bucket / path. - if (send_metadata) - revision_counter = revision - 1; - - auto object_metadata = headObject(restore_information.source_bucket, key).GetMetadata(); - if (operation == rename) - { - auto from_path = object_metadata["from_path"]; - auto to_path = object_metadata["to_path"]; - if (exists(from_path)) - { - moveFile(from_path, to_path, send_metadata); - LOG_TRACE(log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); - - if (restore_information.detached && isDirectory(to_path)) - { - /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. - if (!from_path.ends_with('/')) - from_path += '/'; - if (!to_path.ends_with('/')) - to_path += '/'; - - /// Always keep latest actual directory path to avoid 'detaching' not existing paths. - auto it = renames.find(from_path); - if (it != renames.end()) - renames.erase(it); - - renames.insert(to_path); - } - } - } - else if (operation == hardlink) - { - auto src_path = object_metadata["src_path"]; - auto dst_path = object_metadata["dst_path"]; - if (exists(src_path)) - { - createDirectories(directoryPath(dst_path)); - createHardLink(src_path, dst_path, send_metadata); - LOG_TRACE(log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); - } - } - } - - return true; - }; - - /// Execute. - listObjects(restore_information.source_bucket, restore_information.source_path + "operations/", restore_file_operations); - - if (restore_information.detached) - { - Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; - - for (const auto & path : renames) - { - /// Skip already detached parts. - if (path.find("/detached/") != std::string::npos) - continue; - - /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. - fs::path directory_path(path); - auto directory_name = directory_path.parent_path().filename().string(); - - auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; - if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) - continue; - - auto detached_path = pathToDetached(path); - - LOG_TRACE(log, "Move directory to 'detached' {} -> {}", path, detached_path); - - fs::path from_path = fs::path(path); - fs::path to_path = fs::path(detached_path); - if (path.ends_with('/')) - to_path /= from_path.parent_path().filename(); - else - to_path /= from_path.filename(); - - /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename - if (metadata_disk->exists(to_path)) - metadata_disk->removeRecursive(to_path); - - createDirectories(directoryPath(to_path)); - metadata_disk->moveDirectory(from_path, to_path); - } - } - - LOG_INFO(log, "File operations restored for disk {}", name); -} - -std::tuple DiskS3::extractRevisionAndOperationFromKey(const String & key) -{ - String revision_str; - String operation; - - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); - - return {(revision_str.empty() ? UNKNOWN_REVISION : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; -} - -String DiskS3::shrinkKey(const String & path, const String & key) -{ - if (!key.starts_with(path)) - throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); - - return key.substr(path.length()); -} - -String DiskS3::revisionToString(UInt64 revision) -{ - return std::bitset<64>(revision).to_string(); -} - -String DiskS3::pathToDetached(const String & source_path) -{ - if (source_path.ends_with('/')) - return fs::path(source_path).parent_path().parent_path() / "detached/"; - return fs::path(source_path).parent_path() / "detached/"; -} - -void DiskS3::onFreeze(const String & path) -{ - createDirectories(path); - auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); - writeIntText(revision_counter.load(), *revision_file_buf); - revision_file_buf->finalize(); -} - -void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) -{ - auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context_); - - current_settings.set(std::move(new_settings)); - - if (AsyncExecutor * exec = dynamic_cast(&getExecutor())) - exec->setMaxThreads(current_settings.get()->thread_pool_size); -} - -DiskS3Settings::DiskS3Settings( - const std::shared_ptr & client_, - const S3Settings::ReadWriteSettings & s3_settings_, - size_t min_bytes_for_seek_, - bool send_metadata_, - int thread_pool_size_, - int list_object_keys_size_, - int objects_chunk_size_to_delete_) - : client(client_) - , s3_settings(s3_settings_) - , min_bytes_for_seek(min_bytes_for_seek_) - , send_metadata(send_metadata_) - , thread_pool_size(thread_pool_size_) - , list_object_keys_size(list_object_keys_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) -{ -} - -} - -#endif diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h deleted file mode 100644 index a909d08f6fa..00000000000 --- a/src/Disks/S3/DiskS3.h +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once - -#include - -#if USE_AWS_S3 - -#include -#include -#include -#include "Disks/DiskFactory.h" -#include "Disks/Executor.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -/// Settings for DiskS3 that can be changed in runtime. -struct DiskS3Settings -{ - DiskS3Settings( - const std::shared_ptr & client_, - const S3Settings::ReadWriteSettings & s3_settings_, - size_t min_bytes_for_seek_, - bool send_metadata_, - int thread_pool_size_, - int list_object_keys_size_, - int objects_chunk_size_to_delete_); - - std::shared_ptr client; - S3Settings::ReadWriteSettings s3_settings; - size_t min_bytes_for_seek; - bool send_metadata; - int thread_pool_size; - int list_object_keys_size; - int objects_chunk_size_to_delete; -}; - - -/** - * Storage for persisting data in S3 and metadata on the local disk. - * Files are represented by file in local filesystem (clickhouse_root/disks/disk_name/path/to/file) - * that contains S3 object key with actual data. - */ -class DiskS3 final : public IDiskRemote -{ -public: - using ObjectMetadata = std::map; - using Futures = std::vector>; - - using SettingsPtr = std::unique_ptr; - using GetDiskSettings = std::function; - - struct RestoreInformation; - - DiskS3( - String name_, - String bucket_, - String s3_root_path_, - String version_id_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - ContextPtr context_, - SettingsPtr settings_, - GetDiskSettings settings_getter_); - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) override; - - void removeFromRemoteFS(const std::vector & paths) override; - - void moveFile(const String & from_path, const String & to_path, bool send_metadata); - void moveFile(const String & from_path, const String & to_path) override; - - void createHardLink(const String & src_path, const String & dst_path) override; - void createHardLink(const String & src_path, const String & dst_path, bool send_metadata); - - DiskType getType() const override { return DiskType::S3; } - bool isRemote() const override { return true; } - - bool supportZeroCopyReplication() const override { return true; } - - bool supportParallelWrite() const override { return true; } - - void shutdown() override; - - void startup() override; - - /// Check file exists and ClickHouse has an access to it - /// Overrode in remote disk - /// Required for remote disk to ensure that replica has access to data written by other node - bool checkUniqueId(const String & id) const override; - - /// Dumps current revision counter into file 'revision.txt' at given path. - void onFreeze(const String & path) override; - - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; - -private: - void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata); - /// Converts revision to binary string with leading zeroes (64 bit). - static String revisionToString(UInt64 revision); - - bool checkObjectExists(const String & source_bucket, const String & prefix) const; - void findLastRevision(); - - int readSchemaVersion(const String & source_bucket, const String & source_path); - void saveSchemaVersion(const int & version); - void updateObjectMetadata(const String & key, const ObjectMetadata & metadata); - void migrateFileToRestorableSchema(const String & path); - void migrateToRestorableSchemaRecursive(const String & path, Futures & results); - void migrateToRestorableSchema(); - - Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key) const; - void listObjects(const String & source_bucket, const String & source_path, std::function callback) const; - void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head = std::nullopt) const; - - void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head = std::nullopt, - std::optional> metadata = std::nullopt) const; - void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head = std::nullopt, - std::optional> metadata = std::nullopt) const; - - /// Restore S3 metadata files on file system. - void restore(); - void readRestoreInformation(RestoreInformation & restore_information); - void restoreFiles(const RestoreInformation & restore_information); - void processRestoreFiles(const String & source_bucket, const String & source_path, std::vector keys); - void restoreFileOperations(const RestoreInformation & restore_information); - - /// Remove 'path' prefix from 'key' to get relative key. - /// It's needed to store keys to metadata files in RELATIVE_PATHS version. - static String shrinkKey(const String & path, const String & key); - std::tuple extractRevisionAndOperationFromKey(const String & key); - - /// Forms detached path '../../detached/part_name/' from '../../part_name/' - static String pathToDetached(const String & source_path); - - const String bucket; - - const String version_id; - - MultiVersion current_settings; - /// Gets disk settings from context. - GetDiskSettings settings_getter; - - std::atomic revision_counter = 0; - static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); - static constexpr UInt64 UNKNOWN_REVISION = 0; - - /// File at path {metadata_path}/restore contains metadata restore information - inline static const String RESTORE_FILE_NAME = "restore"; - - /// Key has format: ../../r{revision}-{operation} - const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; - - /// Object contains information about schema version. - inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; - /// Version with possibility to backup-restore metadata. - static constexpr int RESTORABLE_SCHEMA_VERSION = 1; - /// Directories with data. - const std::vector data_roots {"data", "store"}; - - ContextPtr context; -}; - -} - -#endif diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp new file mode 100644 index 00000000000..c4cd3253a21 --- /dev/null +++ b/src/Disks/S3/diskSettings.cpp @@ -0,0 +1,127 @@ +#include + +#include + +namespace DB +{ + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + S3Settings::ReadWriteSettings rw_settings; + rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries); + rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size); + rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor); + rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold); + rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size); + + return std::make_unique( + rw_settings, + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".thread_pool_size", 16), + config.getInt(config_prefix + ".list_object_keys_size", 1000), + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); +} + +std::shared_ptr getProxyResolverConfiguration( + const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config) +{ + auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint")); + auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme"); + if (proxy_scheme != "http" && proxy_scheme != "https") + throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); + auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); + auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10); + + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", + endpoint.toString(), proxy_scheme, proxy_port); + + return std::make_shared(endpoint, proxy_scheme, proxy_port, cache_ttl); +} + +std::shared_ptr getProxyListConfiguration( + const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config) +{ + std::vector keys; + proxy_config.keys(prefix, keys); + + std::vector proxies; + for (const auto & key : keys) + if (startsWith(key, "uri")) + { + Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key)); + + if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https") + throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); + if (proxy_uri.getHost().empty()) + throw Exception("Empty host in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); + + proxies.push_back(proxy_uri); + + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString()); + } + + if (!proxies.empty()) + return std::make_shared(proxies); + + return nullptr; +} + +std::shared_ptr getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config) +{ + if (!config.has(prefix + ".proxy")) + return nullptr; + + std::vector config_keys; + config.keys(prefix + ".proxy", config_keys); + + if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver")) + { + if (resolver_configs > 1) + throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS); + + return getProxyResolverConfiguration(prefix + ".proxy.resolver", config); + } + + return getProxyListConfiguration(prefix + ".proxy", config); +} + + +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( + config.getString(config_prefix + ".region", ""), + context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects); + + S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); + if (uri.key.back() != '/') + throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); + + client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000); + client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000); + client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); + client_configuration.endpointOverride = uri.endpoint; + + auto proxy_config = getProxyConfiguration(config_prefix, config); + if (proxy_config) + { + client_configuration.perRequestConfiguration + = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); }; + client_configuration.error_report + = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; + } + + client_configuration.retryStrategy + = std::make_shared(config.getUInt(config_prefix + ".retry_attempts", 10)); + + return S3::ClientFactory::instance().create( + client_configuration, + uri.is_virtual_hosted_style, + config.getString(config_prefix + ".access_key_id", ""), + config.getString(config_prefix + ".secret_access_key", ""), + config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""), + {}, + config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)), + config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false))); +} + +} diff --git a/src/Disks/S3/diskSettings.h b/src/Disks/S3/diskSettings.h new file mode 100644 index 00000000000..d8784d0b5a5 --- /dev/null +++ b/src/Disks/S3/diskSettings.h @@ -0,0 +1,29 @@ +#pragma once + +#include + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +#endif diff --git a/src/Disks/S3/parseConfig.h b/src/Disks/S3/parseConfig.h new file mode 100644 index 00000000000..2d14ce9468b --- /dev/null +++ b/src/Disks/S3/parseConfig.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#if USE_AWS_S3 + +#include +#include +#include +#include "Disks/DiskCacheWrapper.h" +#include "Storages/StorageS3Settings.h" +#include "ProxyConfiguration.h" +#include "ProxyListConfiguration.h" +#include "ProxyResolverConfiguration.h" +#include "Disks/DiskRestartProxy.h" +#include "Disks/DiskLocal.h" +#include "Disks/RemoteDisksCommon.h" +#include + + +namespace DB +{ + + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +std::shared_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + + +} diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index b145f805a23..6a052dfab02 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -10,19 +10,21 @@ #include #include -#include "DiskS3.h" -#include "Disks/DiskCacheWrapper.h" -#include "Storages/StorageS3Settings.h" -#include "ProxyConfiguration.h" -#include "ProxyListConfiguration.h" -#include "ProxyResolverConfiguration.h" -#include "Disks/DiskRestartProxy.h" -#include "Disks/DiskLocal.h" -#include "Disks/RemoteDisksCommon.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -31,6 +33,7 @@ namespace ErrorCodes namespace { + void checkWriteAccess(IDisk & disk) { auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); @@ -48,130 +51,8 @@ void checkReadAccess(const String & disk_name, IDisk & disk) void checkRemoveAccess(IDisk & disk) { disk.removeFile("test_acl"); } -std::shared_ptr getProxyResolverConfiguration( - const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config) -{ - auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint")); - auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme"); - if (proxy_scheme != "http" && proxy_scheme != "https") - throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); - auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); - auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10); - - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", - endpoint.toString(), proxy_scheme, proxy_port); - - return std::make_shared(endpoint, proxy_scheme, proxy_port, cache_ttl); } -std::shared_ptr getProxyListConfiguration( - const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config) -{ - std::vector keys; - proxy_config.keys(prefix, keys); - - std::vector proxies; - for (const auto & key : keys) - if (startsWith(key, "uri")) - { - Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key)); - - if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https") - throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); - if (proxy_uri.getHost().empty()) - throw Exception("Empty host in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); - - proxies.push_back(proxy_uri); - - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString()); - } - - if (!proxies.empty()) - return std::make_shared(proxies); - - return nullptr; -} - -std::shared_ptr getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config) -{ - if (!config.has(prefix + ".proxy")) - return nullptr; - - std::vector config_keys; - config.keys(prefix + ".proxy", config_keys); - - if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver")) - { - if (resolver_configs > 1) - throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS); - - return getProxyResolverConfiguration(prefix + ".proxy.resolver", config); - } - - return getProxyListConfiguration(prefix + ".proxy", config); -} - -std::shared_ptr -getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - config.getString(config_prefix + ".region", ""), - context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects); - - S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); - if (uri.key.back() != '/') - throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); - - client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000); - client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); - client_configuration.endpointOverride = uri.endpoint; - - auto proxy_config = getProxyConfiguration(config_prefix, config); - if (proxy_config) - { - client_configuration.perRequestConfiguration - = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); }; - client_configuration.error_report - = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; - } - - client_configuration.retryStrategy - = std::make_shared(config.getUInt(config_prefix + ".retry_attempts", 10)); - - return S3::ClientFactory::instance().create( - client_configuration, - uri.is_virtual_hosted_style, - config.getString(config_prefix + ".access_key_id", ""), - config.getString(config_prefix + ".secret_access_key", ""), - config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""), - {}, - config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)), - config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false))); -} - -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries); - rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size); - rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor); - rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold); - rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size); - - return std::make_unique( - getClient(config, config_prefix, context), - rw_settings, - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getBool(config_prefix + ".send_metadata", false), - config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".list_object_keys_size", 1000), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); -} - -} - - void registerDiskS3(DiskFactory & factory) { auto creator = [](const String & name, @@ -191,16 +72,21 @@ void registerDiskS3(DiskFactory & factory) FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - std::shared_ptr s3disk = std::make_shared( - name, - uri.bucket, - uri.key, - uri.version_id, - metadata_disk, - std::move(cache), - context, + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + + ObjectStoragePtr s3_storage = std::make_unique( + std::move(cache), getClient(config, config_prefix, context), getSettings(config, config_prefix, context), - getSettings); + uri.version_id, uri.bucket); + + std::shared_ptr s3disk = std::make_shared( + name, + uri.key, + "DiskS3", + metadata_disk, + std::move(s3_storage), + DiskType::S3, + send_metadata); /// This code is used only to check access to the corresponding disk. if (!config.getBool(config_prefix + ".skip_access_check", false)) diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp new file mode 100644 index 00000000000..c81e5549c92 --- /dev/null +++ b/src/Disks/S3ObjectStorage.cpp @@ -0,0 +1,436 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int S3_ERROR; + extern const int FILE_ALREADY_EXISTS; + extern const int UNKNOWN_FORMAT; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +namespace +{ + +template +void throwIfError(Aws::Utils::Outcome & response) +{ + if (!response.IsSuccess()) + { + const auto & err = response.GetError(); + throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); + } +} + +template +void throwIfError(const Aws::Utils::Outcome & response) +{ + if (!response.IsSuccess()) + { + const auto & err = response.GetError(); + throw Exception(err.GetMessage(), static_cast(err.GetErrorType())); + } +} + +} + +Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const +{ + auto client_ptr = client.get(); + Aws::S3::Model::HeadObjectRequest request; + request.SetBucket(bucket_from); + request.SetKey(key); + + return client_ptr->HeadObject(request); +} + +bool S3ObjectStorage::exists(const std::string & path) const +{ + auto object_head = requestObjectHeadData(bucket, path); + if (!object_head.IsSuccess()) + { + if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + return false; + + throwIfError(object_head); + } + return true; +} + + +std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + + ReadSettings disk_read_settings{read_settings}; + if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + + disk_read_settings.remote_fs_cache = cache; + } + + auto settings_ptr = s3_settings.get(); + + auto s3_impl = std::make_unique( + client.get(), bucket, version_id, common_path_prefix, blobs_to_read, + settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); + + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + { + auto reader = getThreadPoolReader(); + return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); + } + else + { + auto buf = std::make_unique(std::move(s3_impl)); + return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); + } +} + +std::unique_ptr S3ObjectStorage::readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto settings_ptr = s3_settings.get(); + return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); +} + + +std::unique_ptr S3ObjectStorage::writeObject( + const std::string & path, + std::optional attributes, + FinalizeCallback && finalize_callback, + size_t buf_size, + const WriteSettings & write_settings) +{ + bool cache_on_write = cache + && fs::path(path).extension() != ".tmp" + && write_settings.enable_filesystem_cache_on_write_operations + && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; + + auto settings_ptr = s3_settings.get(); + auto s3_buffer = std::make_unique( + client.get(), + bucket, + path, + settings_ptr->s3_settings, + attributes, + buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), + cache_on_write ? cache : nullptr); + + return std::make_unique(std::move(s3_buffer), std::move(finalize_callback), path); +} + +void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +{ + auto settings_ptr = s3_settings.get(); + auto client_ptr = client.get(); + + Aws::S3::Model::ListObjectsV2Request request; + request.SetBucket(bucket); + request.SetPrefix(path); + request.SetMaxKeys(settings_ptr->list_object_keys_size); + + Aws::S3::Model::ListObjectsV2Outcome outcome; + do + { + outcome = client_ptr->ListObjectsV2(request); + throwIfError(outcome); + + auto result = outcome.GetResult(); + auto objects = result.GetContents(); + for (const auto & object : objects) + children.emplace_back(object.GetKey(), object.GetSize()); + + if (objects.empty()) + break; + + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + } while (outcome.GetResult().GetIsTruncated()); +} + +void S3ObjectStorage::removeObject(const std::string & path) +{ + auto client_ptr = client.get(); + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects({obj}); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + + throwIfError(outcome); +} + +void S3ObjectStorage::removeObjects(const std::vector & paths) +{ + if (paths.empty()) + return; + + auto client_ptr = client.get(); + std::vector keys; + keys.reserve(paths.size()); + + for (const auto & path : paths) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + keys.push_back(obj); + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(keys); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + + throwIfError(outcome); + +} + +void S3ObjectStorage::removeObjectIfExists(const std::string & path) +{ + auto client_ptr = client.get(); + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects({obj}); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + client_ptr->DeleteObjects(request); +} + +void S3ObjectStorage::removeObjectsIfExist(const std::vector & paths) +{ + if (paths.empty()) + return; + + auto client_ptr = client.get(); + + std::vector keys; + keys.reserve(paths.size()); + for (const auto & path : paths) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + keys.push_back(obj); + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(keys); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + + throwIfError(outcome); +} + +ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const +{ + ObjectMetadata result; + + auto object_head = requestObjectHeadData(bucket, path); + throwIfError(object_head); + + auto & object_head_result = object_head.GetResult(); + result.size_bytes = object_head_result.GetContentLength(); + result.last_modified = object_head_result.GetLastModified().Millis(); + result.attributes = object_head_result.GetMetadata(); + + return result; +} + +void S3ObjectStorage::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head, + std::optional metadata) const +{ + auto client_ptr = client.get(); + Aws::S3::Model::CopyObjectRequest request; + request.SetCopySource(src_bucket + "/" + src_key); + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + { + request.SetMetadata(*metadata); + request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); + } + + auto outcome = client_ptr->CopyObject(request); + + if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") + { // Can't come here with MinIO, MinIO allows single part upload for large objects. + copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); + return; + } + + throwIfError(outcome); +} + +void S3ObjectStorage::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head, + std::optional metadata) const +{ + if (!head) + head = requestObjectHeadData(src_bucket, src_key).GetResult(); + + auto settings_ptr = s3_settings.get(); + auto client_ptr = client.get(); + size_t size = head->GetContentLength(); + + String multipart_upload_id; + + { + Aws::S3::Model::CreateMultipartUploadRequest request; + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + request.SetMetadata(*metadata); + + auto outcome = client_ptr->CreateMultipartUpload(request); + + throwIfError(outcome); + + multipart_upload_id = outcome.GetResult().GetUploadId(); + } + + std::vector part_tags; + + size_t upload_part_size = settings_ptr->s3_settings.min_upload_part_size; + for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) + { + Aws::S3::Model::UploadPartCopyRequest part_request; + part_request.SetCopySource(src_bucket + "/" + src_key); + part_request.SetBucket(dst_bucket); + part_request.SetKey(dst_key); + part_request.SetUploadId(multipart_upload_id); + part_request.SetPartNumber(part_number); + part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); + + auto outcome = client_ptr->UploadPartCopy(part_request); + if (!outcome.IsSuccess()) + { + Aws::S3::Model::AbortMultipartUploadRequest abort_request; + abort_request.SetBucket(dst_bucket); + abort_request.SetKey(dst_key); + abort_request.SetUploadId(multipart_upload_id); + client_ptr->AbortMultipartUpload(abort_request); + // In error case we throw exception later with first error from UploadPartCopy + } + throwIfError(outcome); + + auto etag = outcome.GetResult().GetCopyPartResult().GetETag(); + part_tags.push_back(etag); + } + + { + Aws::S3::Model::CompleteMultipartUploadRequest req; + req.SetBucket(dst_bucket); + req.SetKey(dst_key); + req.SetUploadId(multipart_upload_id); + + Aws::S3::Model::CompletedMultipartUpload multipart_upload; + for (size_t i = 0; i < part_tags.size(); ++i) + { + Aws::S3::Model::CompletedPart part; + multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + } + + req.SetMultipartUpload(multipart_upload); + + auto outcome = client_ptr->CompleteMultipartUpload(req); + + throwIfError(outcome); + } +} + +void S3ObjectStorage::copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes) +{ + auto head = requestObjectHeadData(bucket, object_from).GetResult(); + if (head.GetContentLength() >= static_cast(5UL * 1024 * 1024 * 1024)) + copyObjectMultipartImpl(bucket, object_from, bucket, object_to, head, object_to_attributes); + else + copyObjectImpl(bucket, object_from, bucket, object_to, head, object_to_attributes); +} + +void S3ObjectStorage::setNewSettings(std::unique_ptr && s3_settings_) +{ + s3_settings.set(std::move(s3_settings_)); +} + +void S3ObjectStorage::setNewClient(std::unique_ptr && client_) +{ + client.set(std::move(client_)); +} + +void S3ObjectStorage::shutdown() +{ + auto client_ptr = client.get(); + /// This call stops any next retry attempts for ongoing S3 requests. + /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. + /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. + /// This should significantly speed up shutdown process if S3 is unhealthy. + const_cast(*client_ptr).DisableRequestProcessing(); +} + +void S3ObjectStorage::startup() +{ + auto client_ptr = client.get(); + + /// Need to be enabled if it was disabled during shutdown() call. + const_cast(*client_ptr.get()).EnableRequestProcessing(); +} + +void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + s3_settings.set(getSettings(config, config_prefix, context)); + client.set(getClient(config, config_prefix, context)); +} + +} diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h new file mode 100644 index 00000000000..b0762d07535 --- /dev/null +++ b/src/Disks/S3ObjectStorage.h @@ -0,0 +1,130 @@ +#pragma once +#include + +#include + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include + + +namespace DB +{ + +struct S3ObjectStorageSettings +{ + + S3ObjectStorageSettings() = default; + + S3ObjectStorageSettings( + const S3Settings::ReadWriteSettings & s3_settings_, + uint64_t min_bytes_for_seek_, + uint64_t thread_pool_size_, + int32_t list_object_keys_size_, + int32_t objects_chunk_size_to_delete_) + : s3_settings(s3_settings_) + , min_bytes_for_seek(min_bytes_for_seek_) + , thread_pool_size(thread_pool_size_) + , list_object_keys_size(list_object_keys_size_) + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + {} + + S3Settings::ReadWriteSettings s3_settings; + + uint64_t min_bytes_for_seek; + uint64_t thread_pool_size; + int32_t list_object_keys_size; + int32_t objects_chunk_size_to_delete; +}; + + +class S3ObjectStorage : public IObjectStorage +{ +public: + S3ObjectStorage( + FileCachePtr && cache_, + std::unique_ptr && client_, + std::unique_ptr && s3_settings_, + String version_id_, + String bucket_) + : IObjectStorage(std::move(cache_)) + , bucket(bucket_) + , client(std::move(client_)) + , s3_settings(std::move(s3_settings_)) + , version_id(std::move(version_id_)) + {} + + bool exists(const std::string & path) const override; + + std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + /// Open the file for write and return WriteBufferFromFileBase object. + std::unique_ptr writeObject( /// NOLINT + const std::string & path, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) override; + + void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const std::string & path) override; + + void removeObjects(const std::vector & paths) override; + + void removeObjectIfExists(const std::string & path) override; + + void removeObjectsIfExist(const std::vector & paths) override; + + ObjectMetadata getObjectMetadata(const std::string & path) const override; + + void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) override; + + void setNewSettings(std::unique_ptr && s3_settings_); + + void setNewClient(std::unique_ptr && client_); + + void shutdown() override; + + void startup() override; + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + +private: + + void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt, + std::optional metadata = std::nullopt) const; + + void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt, + std::optional metadata = std::nullopt) const; + + Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const; + + std::string bucket; + + MultiVersion client; + MultiVersion s3_settings; + + const String version_id; +}; + +} + +#endif diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index c1b2ec7db0f..5b1d278f4c2 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -37,7 +37,7 @@ namespace ErrorCodes ReadBufferFromS3::ReadBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const String & version_id_, diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 5c8396cd6d8..c5f72c7414f 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -29,7 +29,7 @@ namespace DB class ReadBufferFromS3 : public SeekableReadBuffer, public WithFileName, public WithFileSize { private: - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; String bucket; String key; String version_id; @@ -48,7 +48,7 @@ private: public: ReadBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const String & version_id_, @@ -94,7 +94,7 @@ class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory, public { public: explicit ReadBufferS3Factory( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const String & version_id_, @@ -125,7 +125,7 @@ public: String getFileName() const override { return bucket + "/" + key; } private: - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; const String bucket; const String key; const String version_id; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index c277332ef03..3732b662ea2 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -708,7 +708,7 @@ namespace S3 return ret; } - std::shared_ptr ClientFactory::create( // NOLINT + std::unique_ptr ClientFactory::create( // NOLINT const PocoHTTPClientConfiguration & cfg_, bool is_virtual_hosted_style, const String & access_key_id, @@ -746,7 +746,7 @@ namespace S3 use_environment_credentials, use_insecure_imds_request); - return std::make_shared( + return std::make_unique( std::move(auth_signer), std::move(client_configuration), // Client configuration. is_virtual_hosted_style || client_configuration.endpointOverride.empty() // Use virtual addressing only if endpoint is not specified. @@ -856,7 +856,7 @@ namespace S3 quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); } - size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error) + size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error) { Aws::S3::Model::HeadObjectRequest req; req.SetBucket(bucket); diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 16134f173d5..01f77cff820 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -31,7 +31,7 @@ public: static ClientFactory & instance(); - std::shared_ptr create( + std::unique_ptr create( const PocoHTTPClientConfiguration & cfg, bool is_virtual_hosted_style, const String & access_key_id, @@ -76,7 +76,7 @@ struct URI static void validateBucket(const String & bucket, const Poco::URI & uri); }; -size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true); +size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true); } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 342a512ee52..8012ad95ec7 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -54,7 +54,7 @@ struct WriteBufferFromS3::PutObjectTask }; WriteBufferFromS3::WriteBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const S3Settings::ReadWriteSettings & s3_settings_, @@ -65,10 +65,10 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , object_metadata(std::move(object_metadata_)) , client_ptr(std::move(client_ptr_)) , upload_part_size(s3_settings_.min_upload_part_size) , s3_settings(s3_settings_) + , object_metadata(std::move(object_metadata_)) , schedule(std::move(schedule_)) , cache(cache_) { diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 6279e519be0..7dbaad72940 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -48,7 +48,7 @@ class WriteBufferFromS3 final : public BufferWithOwnMemory { public: WriteBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const S3Settings::ReadWriteSettings & s3_settings_, @@ -90,10 +90,11 @@ private: String bucket; String key; - std::optional> object_metadata; - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; size_t upload_part_size = 0; S3Settings::ReadWriteSettings s3_settings; + std::optional> object_metadata; + /// Buffer to accumulate data. std::shared_ptr temporary_buffer; size_t last_part_size = 0; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 87be99aa246..691759892eb 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -184,7 +184,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (storage_settings_ptr->allow_remote_fs_zero_copy_replication) { - if (auto disk = reserved_space->getDisk(); disk->getType() == DB::DiskType::S3) + if (auto disk = reserved_space->getDisk(); disk->supportZeroCopyReplication()) { String dummy; if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, true, dummy).empty()) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index ff08ba0c062..f46bce1c87b 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -110,7 +110,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() if (storage_settings_ptr->allow_remote_fs_zero_copy_replication) { - if (auto disk = reserved_space->getDisk(); disk->getType() == DB::DiskType::S3) + if (auto disk = reserved_space->getDisk(); disk->supportZeroCopyReplication()) { String dummy; if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, true, dummy).empty()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0e7c83742a4..fbdb1dabd88 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1219,7 +1219,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto disks = storage.getDisks(); bool only_s3_storage = true; for (const auto & disk : disks) - if (disk->getType() != DB::DiskType::S3) + if (!disk->supportZeroCopyReplication()) only_s3_storage = false; if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0])) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 36080485aca..a21b5fd9efa 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7760,7 +7760,8 @@ String StorageReplicatedMergeTree::getSharedDataReplica( } -Strings StorageReplicatedMergeTree::getZeroCopyPartPath(const MergeTreeSettings & settings, DiskType disk_type, const String & table_uuid, +Strings StorageReplicatedMergeTree::getZeroCopyPartPath( + const MergeTreeSettings & settings, DiskType disk_type, const String & table_uuid, const String & part_name, const String & zookeeper_path_old) { Strings res; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6107c1a5117..2ab553ad450 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -84,7 +84,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl { public: - Impl(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) + Impl(const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) : client(client_), globbed_uri(globbed_uri_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) @@ -171,7 +171,7 @@ private: bool is_finished{false}; }; -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) +StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) : pimpl(std::make_shared(client_, globbed_uri_)) {} String StorageS3Source::DisclosedGlobIterator::next() @@ -260,7 +260,7 @@ StorageS3Source::StorageS3Source( UInt64 max_block_size_, UInt64 max_single_read_retries_, String compression_hint_, - const std::shared_ptr & client_, + const std::shared_ptr & client_, const String & bucket_, const String & version_id_, std::shared_ptr file_iterator_, @@ -397,7 +397,7 @@ Chunk StorageS3Source::generate() return {}; } -static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) +static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) { bool is_finished = false; Aws::S3::Model::ListObjectsV2Request request; @@ -548,7 +548,6 @@ private: const Block sample_block; ContextPtr context; const CompressionMethod compression_method; - const StorageS3::S3Configuration & s3_configuration; const String bucket; const String key; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index cac5b3c270f..3fde17682d0 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -35,7 +35,7 @@ public: class DisclosedGlobIterator { public: - DisclosedGlobIterator(Aws::S3::S3Client &, const S3::URI &); + DisclosedGlobIterator(const Aws::S3::S3Client &, const S3::URI &); String next(); private: class Impl; @@ -82,7 +82,7 @@ public: UInt64 max_block_size_, UInt64 max_single_read_retries_, String compression_hint_, - const std::shared_ptr & client_, + const std::shared_ptr & client_, const String & bucket, const String & version_id, std::shared_ptr file_iterator_, @@ -104,7 +104,7 @@ private: UInt64 max_block_size; UInt64 max_single_read_retries; String compression_hint; - std::shared_ptr client; + std::shared_ptr client; Block sample_block; std::optional format_settings; @@ -191,7 +191,7 @@ public: const S3::URI uri; const String access_key_id; const String secret_access_key; - std::shared_ptr client; + std::shared_ptr client; S3Settings::AuthSettings auth_settings; S3Settings::ReadWriteSettings rw_settings; }; diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index b7ef3ce3ef2..c7041c05403 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -214,8 +214,9 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): # Wait for merges and old parts deletion for attempt in range(0, 10): parts_count = node.query( - "SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' FORMAT Values" + "SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' and active = 1 FORMAT Values" ) + if parts_count == "(1)": break @@ -228,7 +229,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): assert ( node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" ) - wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD) + wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45) @pytest.mark.parametrize("node_name", ["node"]) From ea389eeacaea7d2511f9375dd6e2ffda823aa89c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 12 May 2022 12:44:32 +0200 Subject: [PATCH 003/150] Fix fast test build --- src/Disks/S3ObjectStorage.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index c81e5549c92..a941022a574 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -1,4 +1,8 @@ #include + + +#if USE_AWS_S3 + #include #include #include @@ -434,3 +438,5 @@ void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & } } + +#endif From c5b40a9c91fc40bf929e54ef23af2bb33ee8c928 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 12 May 2022 16:39:50 +0000 Subject: [PATCH 004/150] WIP on GROUPING function --- src/Core/NamesAndTypes.cpp | 14 ++++ src/Core/NamesAndTypes.h | 2 + .../registerFunctionsMiscellaneous.cpp | 2 + src/Interpreters/ActionsVisitor.cpp | 50 +++++++++++- src/Interpreters/ActionsVisitor.h | 3 + src/Interpreters/ExpressionAnalyzer.cpp | 7 ++ src/Interpreters/InterpreterSelectQuery.cpp | 4 + src/Parsers/ExpressionElementParsers.cpp | 16 ++++ src/Processors/QueryPlan/AggregatingStep.cpp | 28 ++++++- src/Storages/VirtualColumnUtils.cpp | 2 +- .../02293_grouping_function.reference | 81 +++++++++++++++++++ .../0_stateless/02293_grouping_function.sql | 79 ++++++++++++++++++ 12 files changed, 280 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02293_grouping_function.reference create mode 100644 tests/queries/0_stateless/02293_grouping_function.sql diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index bd24a9e82bd..72768ce23fb 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -214,4 +215,17 @@ std::optional NamesAndTypesList::tryGetByName(const std::string } return {}; } + +size_t NamesAndTypesList::getPosByName(const std::string &name) const noexcept +{ + size_t pos = 0; + for (const NameAndTypePair & column : *this) + { + if (column.name == name) + break; + ++pos; + } + return pos; +} + } diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index 2719017a726..c7a51f51816 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -107,6 +107,8 @@ public: /// Try to get column by name, return empty optional if column not found std::optional tryGetByName(const std::string & name) const; + + size_t getPosByName(const std::string & name) const noexcept; }; using NamesAndTypesLists = std::vector; diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 9cd9c70da16..9fe1fa69b5e 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -83,6 +83,7 @@ void registerFunctionZooKeeperSessionUptime(FunctionFactory &); void registerFunctionGetOSKernelVersion(FunctionFactory &); void registerFunctionGetTypeSerializationStreams(FunctionFactory &); void registerFunctionFlattenTuple(FunctionFactory &); +void registerFunctionGrouping(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -172,6 +173,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetOSKernelVersion(factory); registerFunctionGetTypeSerializationStreams(factory); registerFunctionFlattenTuple(factory); + registerFunctionGrouping(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index c57b85951bc..b7efbc97cc9 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include #include #include @@ -12,6 +15,7 @@ #include #include #include +#include #include #include @@ -459,14 +463,23 @@ public: }; ActionsMatcher::Data::Data( - ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, ActionsDAGPtr actions_dag, - PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, - bool no_subqueries_, bool no_makeset_, bool only_consts_, bool create_source_for_in_) + ContextPtr context_, + SizeLimits set_size_limit_, + size_t subquery_depth_, + const NamesAndTypesList & source_columns_, + const NamesAndTypesList & aggregation_keys_, + ActionsDAGPtr actions_dag, + PreparedSets & prepared_sets_, + SubqueriesForSets & subqueries_for_sets_, + bool no_subqueries_, + bool no_makeset_, + bool only_consts_, + bool create_source_for_in_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) , source_columns(source_columns_) + , aggregation_keys(aggregation_keys_) , prepared_sets(prepared_sets_) , subqueries_for_sets(subqueries_for_sets_) , no_subqueries(no_subqueries_) @@ -817,6 +830,35 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & return; } + if (node.name == "grouping") + { + auto arguments_column_name = data.getUniqueName("__grouping_args"); + { + ColumnWithTypeAndName column; + column.name = arguments_column_name; + column.type = std::make_shared(std::make_shared()); + Array arguments_to_keys_map; + for (auto const & arg : node.arguments->children) + { + size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); + arguments_to_keys_map.push_back(pos); + } + auto arguments_column = ColumnArray::create(ColumnUInt64::create()); + arguments_column->insert(Field{arguments_to_keys_map}); + + column.column = ColumnConst::create(ColumnPtr(std::move(arguments_column)), 1); + + data.addColumn(column); + } + + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set_map", arguments_column_name }, + column_name + ); + return; + } + SetPtr prepared_set; if (checkFunctionIsInOrGlobalInOperator(node)) { diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index d1558cb961c..313eae9fc8d 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -87,6 +88,7 @@ public: SizeLimits set_size_limit; size_t subquery_depth; const NamesAndTypesList & source_columns; + const NamesAndTypesList & aggregation_keys; PreparedSets & prepared_sets; SubqueriesForSets & subqueries_for_sets; bool no_subqueries; @@ -108,6 +110,7 @@ public: SizeLimits set_size_limit_, size_t subquery_depth_, const NamesAndTypesList & source_columns_, + const NamesAndTypesList & aggregation_keys_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index e7325363c08..8c3ea878718 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -47,6 +47,7 @@ #include #include +#include #include #include @@ -442,6 +443,9 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } + if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + if (group_asts.empty()) { select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); @@ -577,6 +581,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ settings.size_limits_for_set, subquery_depth, sourceColumns(), + aggregation_keys, std::move(actions), prepared_sets, subqueries_for_sets, @@ -597,6 +602,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP settings.size_limits_for_set, subquery_depth, sourceColumns(), + aggregation_keys, std::move(actions), prepared_sets, subqueries_for_sets, @@ -618,6 +624,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( settings.size_limits_for_set, subquery_depth, sourceColumns(), + aggregation_keys, std::move(actions), prepared_sets, subqueries_for_sets, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bfadc66352..c8e04777574 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -756,6 +757,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } + if (analysis_result.use_grouping_set_key) + res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); + return res; } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 9150fee3bde..021d4356f41 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -801,6 +801,20 @@ namespace node = makeASTFunction("exists", subquery); return true; } + + bool parseGrouping(IParser::Pos & pos, ASTPtr & node, Expected & expected) + { + ASTPtr expr_list; + if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) + return false; + + auto res = std::make_shared(); + res->name = "grouping"; + res->arguments = expr_list; + res->children.push_back(res->arguments); + node = std::move(res); + return true; + } } @@ -886,6 +900,8 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") parsed_special_function = parseDateDiff(pos, node, expected); + else if (function_name_lowercase == "grouping") + parsed_special_function = parseGrouping(pos, node, expected); if (parsed_special_function.has_value()) return parsed_special_function.value() && ParserToken(TokenType::ClosingRoundBracket).ignore(pos); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index d7d62d07d92..87588facff2 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -12,7 +12,9 @@ #include #include #include +#include #include +#include namespace DB { @@ -33,7 +35,7 @@ static ITransformingStep::Traits getTraits() }; } -static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) +static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params, size_t keys_size) { if (params.empty()) return block; @@ -48,6 +50,10 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); + auto map_column = ColumnFixedString::create(keys_size + 1); + map_column->resize(rows); + res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); + return res; } @@ -63,7 +69,7 @@ AggregatingStep::AggregatingStep( bool storage_has_evenly_distributed_read_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_) - : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_), getTraits(), false) + : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_, params_.keys_size), getTraits(), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) , final(std::move(final_)) @@ -210,7 +216,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B /// Here we create a DAG which fills missing keys and adds `__grouping_set` column auto dag = std::make_shared(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs index; - index.reserve(output_header.columns() + 1); + index.reserve(output_header.columns() + 2); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); const auto * grouping_node = &dag->addColumn( @@ -237,6 +243,22 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } + { + std::string grouping_map; + grouping_map.reserve(params.keys_size + 1); + std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); + for (auto key : params.keys) + grouping_map += key_set.contains(key) ? '1' : '0'; + grouping_map += '0'; + auto nested_column = ColumnFixedString::create(params.keys_size + 1); + nested_column->insertString(grouping_map); + auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); + const auto * grouping_map_node = &dag->addColumn( + {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); + grouping_map_node = &dag->materializeNode(*grouping_map_node); + index.push_back(grouping_map_node); + } + dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); auto transform = std::make_shared(header, expression); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index d0840778c0f..99f3b86ac26 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference new file mode 100644 index 00000000000..5ea3ca4a15b --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -0,0 +1,81 @@ +0 2 +0 2 +0 4 +1 4 +2 4 +3 4 +4 4 +5 4 +6 4 +7 4 +8 4 +9 4 +0 1 +0 1 +0 4 +1 4 +2 4 +3 4 +4 4 +5 4 +6 4 +7 4 +8 4 +9 4 +0 0 +0 1 +0 1 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +0 +0 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 10 0 +0 1 4 +1 1 4 +2 1 4 +3 1 4 +4 1 4 +5 1 4 +6 1 4 +7 1 4 +8 1 4 +9 1 4 +0 1 6 +1 1 6 +2 1 6 +3 1 6 +4 1 6 +5 1 6 +6 1 6 +7 1 6 +8 1 6 +9 1 6 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +0 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql new file mode 100644 index 00000000000..65771fd479d --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -0,0 +1,79 @@ +SELECT + number, + grouping(number, number % 2, number % 3) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 3, number % 2) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 2, number % 3) = 2 AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, grouping(number, number % 2, number % 3) = 2; + +SELECT + number, + count(), + grouping(number, number % 2, number % 3) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number, number % 2), + () + ) +ORDER BY (gr, number); + +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2, number % 3) = 4 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2, number % 3) = 2 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; From 92575fc3e544121feafdcb9a86319ee57d9d393c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 12 May 2022 16:54:02 +0000 Subject: [PATCH 005/150] Add missing file --- src/Functions/grouping.cpp | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/Functions/grouping.cpp diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp new file mode 100644 index 00000000000..19e810edbd2 --- /dev/null +++ b/src/Functions/grouping.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class FunctionGrouping : public IFunction +{ +public: + static constexpr auto name = "grouping"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + bool isVariadic() const override + { + return true; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool isSuitableForConstantFolding() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + String getName() const override + { + return name; + } + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + //TODO: add assert for argument types + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + + LOG_DEBUG(&Poco::Logger::get("Grouping"), "Args: {}, rows: {}", arguments.size(), arguments[1].column->getFamilyName()); + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto mask = grouping_set_column->getDataAt(i).toView(); + LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}", mask); + auto indexes = (*argument_keys_column)[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (mask[index.get()] == '1' ? 1 : 0); + LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}, Arg: {}, value: {}", mask, toString(indexes), value); + result->insert(Field(value)); + } + return result; + } + +}; + +void registerFunctionGrouping(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} From d8580c8cb8e741f651d4ca5a2407d8e57a4eff07 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 12 May 2022 19:51:04 +0200 Subject: [PATCH 006/150] Digging --- src/Disks/DiskObjectStorage.cpp | 111 +++++++++++++++++++------------- src/Disks/DiskObjectStorage.h | 14 ++-- src/Disks/IObjectStorage.cpp | 12 ++++ src/Disks/IObjectStorage.h | 6 ++ src/Disks/S3/registerDiskS3.cpp | 10 ++- src/Disks/S3ObjectStorage.cpp | 26 +++++++- src/Disks/S3ObjectStorage.h | 11 ++-- 7 files changed, 133 insertions(+), 57 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 8fbde6dc6ca..04adebf1e82 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -569,16 +569,6 @@ void DiskObjectStorage::startup() LOG_INFO(log, "Starting up disk {}", name); object_storage->startup(); - if (send_metadata) - { - metadata_helper->restore(); - - if (metadata_helper->readSchemaVersion(remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) - metadata_helper->migrateToRestorableSchema(); - - metadata_helper->findLastRevision(); - } - LOG_INFO(log, "Disk {} started up", name); } @@ -674,6 +664,26 @@ void DiskObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration object_storage->applyNewSettings(config, "storage_configuration.disks." + name, context_); } +void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + if (send_metadata) + { + LOG_DEBUG(log, "START RESTORING METADATA"); + metadata_helper->restore(config, config_prefix, context); + + if (metadata_helper->readSchemaVersion(object_storage.get(), remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) + { + LOG_DEBUG(log, "DONE READING"); + metadata_helper->migrateToRestorableSchema(); + LOG_DEBUG(log, "MIGRATION FINISHED"); + } + + LOG_DEBUG(log, "SEARCHING LAST REVISION"); + metadata_helper->findLastRevision(); + LOG_DEBUG(log, "DONE RESTORING METADATA"); + } +} + DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const { if (i != 0) @@ -750,14 +760,14 @@ void DiskObjectStorageMetadataHelper::findLastRevision() LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); } -int DiskObjectStorageMetadataHelper::readSchemaVersion(const String & source_path) const +int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const { const std::string path = source_path + SCHEMA_VERSION_OBJECT; int version = 0; - if (!disk->object_storage->exists(path)) + if (!object_storage->exists(path)) return version; - auto buf = disk->object_storage->readObject(path); + auto buf = object_storage->readObject(path); readIntText(version, *buf); return version; @@ -800,20 +810,22 @@ void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const S bool dir_contains_only_files = true; for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + { if (disk->isDirectory(it->path())) { dir_contains_only_files = false; break; } + } /// The whole directory can be migrated asynchronously. if (dir_contains_only_files) { auto result = disk->getExecutor().execute([this, path] - { - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - migrateFileToRestorableSchema(it->path()); - }); + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + migrateFileToRestorableSchema(it->path()); + }); results.push_back(std::move(result)); } @@ -863,15 +875,18 @@ void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() } } -void DiskObjectStorageMetadataHelper::restore() +void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { if (!disk->exists(RESTORE_FILE_NAME)) + { return; + } try { RestoreInformation information; information.source_path = disk->remote_fs_root_path; + information.source_namespace = disk->object_storage->getObjectsNamespace(); readRestoreInformation(information); if (information.revision == 0) @@ -879,19 +894,28 @@ void DiskObjectStorageMetadataHelper::restore() if (!information.source_path.ends_with('/')) information.source_path += '/'; - /// In this case we need to additionally cleanup S3 from objects with later revision. - /// Will be simply just restore to different path. - if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) - throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); + IObjectStorage * source_object_storage = disk->object_storage.get(); + if (information.source_namespace == disk->object_storage->getObjectsNamespace()) + { + /// In this case we need to additionally cleanup S3 from objects with later revision. + /// Will be simply just restore to different path. + if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) + throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); - /// This case complicates S3 cleanup in case of unsuccessful restore. - if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) - throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + /// This case complicates S3 cleanup in case of unsuccessful restore. + if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) + throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + } + else + { + object_storage_from_another_namespace = disk->object_storage->cloneObjectStorage(information.source_namespace, config, config_prefix, context); + source_object_storage = object_storage_from_another_namespace.get(); + } LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", disk->name, information.revision, information.source_path); - if (readSchemaVersion(information.source_path) < RESTORABLE_SCHEMA_VERSION) + if (readSchemaVersion(source_object_storage, information.source_path) < RESTORABLE_SCHEMA_VERSION) throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); LOG_INFO(disk->log, "Removing old metadata..."); @@ -901,8 +925,8 @@ void DiskObjectStorageMetadataHelper::restore() if (disk->exists(root)) disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); - restoreFiles(information); - restoreFileOperations(information); + restoreFiles(source_object_storage, information); + restoreFileOperations(source_object_storage, information); disk->metadata_disk->removeFile(RESTORE_FILE_NAME); @@ -949,10 +973,12 @@ void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation for (const auto & [key, value] : properties) { - ReadBufferFromString value_buffer (value); + ReadBufferFromString value_buffer(value); if (key == "revision") readIntText(restore_information.revision, value_buffer); + else if (key == "source_bucket" || key == "source_namespace") + readText(restore_information.source_namespace, value_buffer); else if (key == "source_path") readText(restore_information.source_path, value_buffer); else if (key == "detached") @@ -988,12 +1014,12 @@ static std::tuple extractRevisionAndOperationFromKey(const Strin return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; } -void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & restore_information) +void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) { LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); std::vector> results; - auto restore_files = [this, &restore_information, &results](const BlobsPathToSize & keys) + auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) { std::vector keys_names; for (const auto & [key, size] : keys) @@ -1012,9 +1038,9 @@ void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & re if (!keys_names.empty()) { - auto result = disk->getExecutor().execute([this, &restore_information, keys_names]() + auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() { - processRestoreFiles(restore_information.source_path, keys_names); + processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); }); results.push_back(std::move(result)); @@ -1024,7 +1050,7 @@ void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & re }; BlobsPathToSize children; - disk->object_storage->listPrefix(restore_information.source_path, children); + source_object_storage->listPrefix(restore_information.source_path, children); restore_files(children); for (auto & result : results) @@ -1036,11 +1062,11 @@ void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & re } -void DiskObjectStorageMetadataHelper::processRestoreFiles(const String & source_path, std::vector keys) +void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) { for (const auto & key : keys) { - auto meta = disk->object_storage->getObjectMetadata(key); + auto meta = source_object_storage->getObjectMetadata(key); auto object_attributes = meta.attributes; String path; @@ -1066,7 +1092,7 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(const String & source_ /// Copy object if we restore to different bucket / path. if (disk->remote_fs_root_path != source_path) - disk->object_storage->copyObject(key, disk->remote_fs_root_path + relative_key); + source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) { @@ -1088,13 +1114,13 @@ static String pathToDetached(const String & source_path) return fs::path(source_path).parent_path() / "detached/"; } -void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInformation & restore_information) +void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) { /// Enable recording file operations if we restore to different bucket / path. - bool send_metadata = disk->remote_fs_root_path != restore_information.source_path; + bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; std::set renames; - auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) { const String rename = "rename"; const String hardlink = "hardlink"; @@ -1117,7 +1143,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInforma if (send_metadata) revision_counter = revision - 1; - auto object_attributes = *(disk->object_storage->getObjectMetadata(key).attributes); + auto object_attributes = *(source_object_storage->getObjectMetadata(key).attributes); if (operation == rename) { auto from_path = object_attributes["from_path"]; @@ -1180,7 +1206,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInforma }; BlobsPathToSize children; - disk->object_storage->listPrefix(restore_information.source_path + "operations/", children); + source_object_storage->listPrefix(restore_information.source_path + "operations/", children); restore_file_operations(children); if (restore_information.detached) @@ -1224,5 +1250,4 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInforma LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); } - } diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 2147f9527d5..7e5d30dfea2 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -164,6 +164,7 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; + void restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); private: const String name; const String remote_fs_root_path; @@ -284,6 +285,7 @@ public: struct RestoreInformation { UInt64 revision = LATEST_REVISION; + String source_namespace; String source_path; bool detached = false; }; @@ -293,18 +295,18 @@ public: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; void findLastRevision(); - int readSchemaVersion(const String & source_path) const; + int readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const; void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; void migrateToRestorableSchemaRecursive(const String & path, Futures & results); void migrateToRestorableSchema(); - void restore(); + void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void readRestoreInformation(RestoreInformation & restore_information); - void restoreFiles(const RestoreInformation & restore_information); - void processRestoreFiles(const String & source_path, std::vector keys); - void restoreFileOperations(const RestoreInformation & restore_information); + void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); + void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys); + void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); std::atomic revision_counter = 0; inline static const String RESTORE_FILE_NAME = "restore"; @@ -318,6 +320,8 @@ public: DiskObjectStorage * disk; + ObjectStoragePtr object_storage_from_another_namespace; + ReadSettings read_settings; }; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp index ac8f3fc39e8..44b9430172b 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/IObjectStorage.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -34,4 +35,15 @@ void IObjectStorage::removeFromCache(const std::string & path) } } +void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +{ + if (&object_storage_to == this) + copyObject(object_from, object_to, object_to_attributes); + + auto in = readObject(object_from); + auto out = object_storage_to.writeObject(object_to); + copyData(*in, *out); + out->finalize(); +} + } diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h index f2cc9b90294..6a66ffb622e 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/IObjectStorage.h @@ -97,6 +97,8 @@ public: virtual void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + virtual void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}); + virtual ~IObjectStorage() = default; std::string getCacheBasePath() const; @@ -113,6 +115,10 @@ public: virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + virtual String getObjectsNamespace() const = 0; + + virtual std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + protected: FileCachePtr cache; }; diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 6a052dfab02..54b736788fa 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -79,7 +79,7 @@ void registerDiskS3(DiskFactory & factory) getSettings(config, config_prefix, context), uri.version_id, uri.bucket); - std::shared_ptr s3disk = std::make_shared( + std::shared_ptr s3disk = std::make_shared( name, uri.key, "DiskS3", @@ -98,6 +98,9 @@ void registerDiskS3(DiskFactory & factory) s3disk->startup(); + s3disk->restoreMetadataIfNeeded(config, config_prefix, context); + + std::shared_ptr disk_result = s3disk; #ifdef NDEBUG bool use_cache = true; @@ -110,10 +113,11 @@ void registerDiskS3(DiskFactory & factory) if (config.getBool(config_prefix + ".cache_enabled", use_cache)) { String cache_path = config.getString(config_prefix + ".cache_path", context->getPath() + "disks/" + name + "/cache/"); - s3disk = wrapWithCache(s3disk, "s3-cache", cache_path, metadata_path); + disk_result = wrapWithCache(disk_result, "s3-cache", cache_path, metadata_path); } - return std::make_shared(s3disk); + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "DONE DISK"); + return std::make_shared(disk_result); }; factory.registerDiskType("s3", creator); } diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index a941022a574..0a7bd45d546 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -81,11 +81,15 @@ bool S3ObjectStorage::exists(const std::string & path) const auto object_head = requestObjectHeadData(bucket, path); if (!object_head.IsSuccess()) { - if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT DOESNT {} EXISTS", path); return false; + } throwIfError(object_head); } + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT {} EXISTS", path); return true; } @@ -291,6 +295,15 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons return result; } +void S3ObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +{ + /// Shortcut for S3 + if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) + copyObjectImpl(bucket, object_from, dest_s3->bucket, object_to, {}, object_to_attributes); + else + IObjectStorage::copyObjectToAnotherObjectStorage(object_from, object_to, object_storage_to, object_to_attributes); +} + void S3ObjectStorage::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, std::optional head, std::optional metadata) const @@ -428,7 +441,7 @@ void S3ObjectStorage::startup() auto client_ptr = client.get(); /// Need to be enabled if it was disabled during shutdown() call. - const_cast(*client_ptr.get()).EnableRequestProcessing(); + const_cast(*client_ptr).EnableRequestProcessing(); } void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) @@ -437,6 +450,15 @@ void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & client.set(getClient(config, config_prefix, context)); } +std::unique_ptr S3ObjectStorage::cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + return std::make_unique( + nullptr, getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), + version_id, new_namespace); } +} + + #endif diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index b0762d07535..7632a643130 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -17,7 +17,6 @@ namespace DB struct S3ObjectStorageSettings { - S3ObjectStorageSettings() = default; S3ObjectStorageSettings( @@ -95,9 +94,7 @@ public: void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) override; - void setNewSettings(std::unique_ptr && s3_settings_); - - void setNewClient(std::unique_ptr && client_); + void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}) override; void shutdown() override; @@ -105,7 +102,13 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + String getObjectsNamespace() const override { return bucket; } + + std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; private: + void setNewSettings(std::unique_ptr && s3_settings_); + + void setNewClient(std::unique_ptr && client_); void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, std::optional head = std::nullopt, From c330c7703b6f44a9e906a74f551d4d734d6c61cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Sat, 7 May 2022 23:49:36 +0200 Subject: [PATCH 007/150] WIP: Seconds as floating point --- src/Core/SettingsFields.cpp | 69 ++++++++++++++++++++++++++++++++----- src/Core/SettingsFields.h | 2 +- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index c545ae753de..261a2822da8 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { @@ -176,27 +178,75 @@ UInt64 SettingFieldMaxThreads::getAuto() return getNumberOfPhysicalCPUCores(); } +namespace +{ + Poco::Timespan::TimeDiff float64AsSecondsToTimespan(Float64 d) + { + if (!std::isnormal(d) || std::signbit(d)) + throw Exception( + ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a positive normal floating point number"); + return static_cast(d *= 1000000); + } -template -SettingFieldTimespan::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(fieldToNumber(f)) +} + +template <> +SettingFieldSeconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(float64AsSecondsToTimespan(fieldToNumber(f))) { } -template -SettingFieldTimespan & SettingFieldTimespan::operator=(const Field & f) +template <> +SettingFieldMilliseconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(fieldToNumber(f)) +{ +} + +template <> +SettingFieldSeconds & SettingFieldSeconds::operator=(const Field & f) +{ + *this = Poco::Timespan{float64AsSecondsToTimespan(fieldToNumber(f))}; + return *this; +} + +template <> +SettingFieldMilliseconds & SettingFieldMilliseconds::operator=(const Field & f) { *this = fieldToNumber(f); return *this; } -template -String SettingFieldTimespan::toString() const +template <> +String SettingFieldSeconds::toString() const +{ + return ::DB::toString(static_cast(value.totalMicroseconds()) / microseconds_per_unit); +} + +template <> +String SettingFieldMilliseconds::toString() const { return ::DB::toString(operator UInt64()); } -template -void SettingFieldTimespan::parseFromString(const String & str) +template <> +SettingFieldSeconds::operator Field() const +{ + return static_cast(value.totalMicroseconds()) / microseconds_per_unit; +} + +template <> +SettingFieldMilliseconds::operator Field() const +{ + return operator UInt64(); +} + +template <> +void SettingFieldSeconds::parseFromString(const String & str) +{ + Float64 n = parse(str.data(), str.size()); + *this = Poco::Timespan{static_cast(n * microseconds_per_unit)}; +} + +template <> +void SettingFieldMilliseconds::parseFromString(const String & str) { *this = stringToNumber(str); } @@ -204,6 +254,9 @@ void SettingFieldTimespan::parseFromString(const String & str) template void SettingFieldTimespan::writeBinary(WriteBuffer & out) const { + /// Note that this is unchanged and returns UInt64 for both seconds and milliseconds for + /// compatibility reasons as it's only used the clients or servers older than + /// DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS auto num_units = operator UInt64(); writeVarUInt(num_units, out); } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 474786eb963..dcc99f4a2c0 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -124,7 +124,7 @@ struct SettingFieldTimespan operator std::chrono::duration() const { return std::chrono::duration_cast>(std::chrono::microseconds(value.totalMicroseconds())); } /// NOLINT explicit operator UInt64() const { return value.totalMicroseconds() / microseconds_per_unit; } - explicit operator Field() const { return operator UInt64(); } + explicit operator Field() const; Poco::Timespan::TimeDiff totalMicroseconds() const { return value.totalMicroseconds(); } Poco::Timespan::TimeDiff totalMilliseconds() const { return value.totalMilliseconds(); } From 002498bd2be9665fa090055d94141e534485e019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 13 May 2022 13:52:41 +0200 Subject: [PATCH 008/150] Add tests to check decimal value in seconds --- src/Core/SettingsFields.cpp | 4 +- .../02294_decimal_second_errors.reference | 3 ++ .../02294_decimal_second_errors.sql | 9 ++++ ...loating_point_second_in_settings.reference | 8 ++++ ...02294_floating_point_second_in_settings.sh | 41 +++++++++++++++++++ 5 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02294_decimal_second_errors.reference create mode 100644 tests/queries/0_stateless/02294_decimal_second_errors.sql create mode 100644 tests/queries/0_stateless/02294_floating_point_second_in_settings.reference create mode 100755 tests/queries/0_stateless/02294_floating_point_second_in_settings.sh diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 261a2822da8..64971ccb685 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -182,9 +182,9 @@ namespace { Poco::Timespan::TimeDiff float64AsSecondsToTimespan(Float64 d) { - if (!std::isnormal(d) || std::signbit(d)) + if (std::signbit(d) || (d != 0.0 && !std::isnormal(d))) throw Exception( - ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a positive normal floating point number"); + ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a positive normal floating point number. Got {}", d); return static_cast(d *= 1000000); } diff --git a/tests/queries/0_stateless/02294_decimal_second_errors.reference b/tests/queries/0_stateless/02294_decimal_second_errors.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/02294_decimal_second_errors.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/02294_decimal_second_errors.sql b/tests/queries/0_stateless/02294_decimal_second_errors.sql new file mode 100644 index 00000000000..1beffc1e7e5 --- /dev/null +++ b/tests/queries/0_stateless/02294_decimal_second_errors.sql @@ -0,0 +1,9 @@ +SELECT 1 SETTINGS max_execution_time=NaN; -- { serverError 72 } +SELECT 1 SETTINGS max_execution_time=Infinity; -- { serverError 72 }; +SELECT 1 SETTINGS max_execution_time=-Infinity; -- { serverError 72 }; +SELECT 1 SETTINGS max_execution_time=-0.5; -- { serverError 72 }; +SELECT 1 SETTINGS max_execution_time=-0.000000000001; -- { serverError 72 }; +SELECT 1 SETTINGS max_execution_time=-0.0; -- { serverError 72 }; +SELECT 1 SETTINGS max_execution_time=0.0; +SELECT 1 SETTINGS max_execution_time=10.5; +SELECT 1 SETTINGS max_execution_time=10; diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference b/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference new file mode 100644 index 00000000000..f6216e2486a --- /dev/null +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference @@ -0,0 +1,8 @@ +TCP CLIENT +maximum: 1.1 +TCP CLIENT WITH SETTINGS IN QUERY +maximum: 1.1 +HTTP CLIENT +maximum: 1.1 +TABLE: system.settings +max_execution_time 0.1 1 diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh new file mode 100755 index 00000000000..78aece76e49 --- /dev/null +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e -o pipefail + + +MAX_TIMEOUT=1.1 # Use 1.1 because using 0.x truncates to 0 in older releases + +function check_output() { + MAXTIME_USED=$(echo "$1" | grep -Eo "maximum: [0-9]+\.[0-9]+" | head -n1 || true) + if [ "${MAXTIME_USED}" != "maximum: ${MAX_TIMEOUT}" ]; + then + echo "'$MAXTIME_USED' is not equal to 'maximum: ${MAX_TIMEOUT}'" + echo "OUTPUT: $1" + else + echo "$MAXTIME_USED" + fi +} + +# TCP CLIENT +echo "TCP CLIENT" +OUTPUT=$($CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) +check_output "${OUTPUT}" + +echo "TCP CLIENT WITH SETTINGS IN QUERY" +OUTPUT=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) +check_output "${OUTPUT}" + +# HTTP CLIENT +echo "HTTP CLIENT" +OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=$MAX_TIMEOUT" -d \ + "SELECT count() FROM system.numbers" || true) +check_output "${OUTPUT}" + +# CHECK system.settings +echo "TABLE: system.settings" +echo "SELECT name, value, changed from system.settings where name = 'max_execution_time'" | clickhouse-client --max_execution_time 0.1 From 6d4bac321edd55bbfebd739bfa68767c1258659f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 13 May 2022 15:08:37 +0200 Subject: [PATCH 009/150] Style --- src/Core/SettingsFields.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 64971ccb685..f13fd85758f 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH; extern const int CANNOT_PARSE_BOOL; + extern const int CANNOT_PARSE_NUMBER; } From d492b1c44dae3e5efa1af9fcbf6020653d64f397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 13 May 2022 16:11:45 +0200 Subject: [PATCH 010/150] Fix timeout and better comment --- src/Core/SettingsFields.cpp | 2 +- tests/queries/0_stateless/02127_connection_drain.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index f13fd85758f..14c831f08f1 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -256,7 +256,7 @@ template void SettingFieldTimespan::writeBinary(WriteBuffer & out) const { /// Note that this is unchanged and returns UInt64 for both seconds and milliseconds for - /// compatibility reasons as it's only used the clients or servers older than + /// compatibility reasons as it's only used by the clients or servers older than /// DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS auto num_units = operator UInt64(); writeVarUInt(num_units, out); diff --git a/tests/queries/0_stateless/02127_connection_drain.sh b/tests/queries/0_stateless/02127_connection_drain.sh index 523b02d9bd5..597497e9b78 100755 --- a/tests/queries/0_stateless/02127_connection_drain.sh +++ b/tests/queries/0_stateless/02127_connection_drain.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # sync drain for _ in {1..100}; do prev=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") - curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=-1 format Null" + curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=0 format Null" now=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") if [[ "$prev" != $(( now-2 )) ]]; then continue From ae81268d4d057da377d13eb04f16047d0ff2acf9 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 13 May 2022 14:55:50 +0000 Subject: [PATCH 011/150] Try to compute helper column lazy --- src/Core/Names.h | 1 + src/Functions/grouping.cpp | 9 +++-- src/Interpreters/ActionsVisitor.cpp | 28 +++++++++++++- src/Interpreters/ActionsVisitor.h | 3 ++ src/Interpreters/ExpressionAnalyzer.cpp | 22 ++++++++--- src/Interpreters/ExpressionAnalyzer.h | 2 + src/Interpreters/InterpreterSelectQuery.cpp | 4 +- src/Processors/QueryPlan/AggregatingStep.cpp | 40 ++++++++++---------- src/Storages/VirtualColumnUtils.cpp | 2 +- 9 files changed, 79 insertions(+), 32 deletions(-) diff --git a/src/Core/Names.h b/src/Core/Names.h index 3281daa560e..003168bde27 100644 --- a/src/Core/Names.h +++ b/src/Core/Names.h @@ -16,6 +16,7 @@ using NameOrderedSet = std::set; using NameToNameMap = std::unordered_map; using NameToNameSetMap = std::unordered_map; using NameToNameVector = std::vector>; +using NameToIndexMap = std::unordered_map; using NameWithAlias = std::pair; using NamesWithAliases = std::vector; diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index 19e810edbd2..c1b349ce9da 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -1,5 +1,6 @@ #include #include +#include "Columns/ColumnsNumber.h" #include #include #include @@ -49,14 +50,16 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); - auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); LOG_DEBUG(&Poco::Logger::get("Grouping"), "Args: {}, rows: {}", arguments.size(), arguments[1].column->getFamilyName()); auto result = std::make_shared()->createColumn(); for (size_t i = 0; i < input_rows_count; ++i) { - auto mask = grouping_set_column->getDataAt(i).toView(); + UInt64 set_index = grouping_set_column->get64(i); + auto mask = grouping_set_map_column->getDataAt(set_index).toView(); LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}", mask); auto indexes = (*argument_keys_column)[i].get(); UInt64 value = 0; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index b7efbc97cc9..2e2cc49af58 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -11,6 +12,7 @@ #include #include +#include #include #include #include @@ -468,6 +470,7 @@ ActionsMatcher::Data::Data( size_t subquery_depth_, const NamesAndTypesList & source_columns_, const NamesAndTypesList & aggregation_keys_, + const ColumnNumbersList & grouping_set_keys_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -480,6 +483,7 @@ ActionsMatcher::Data::Data( , subquery_depth(subquery_depth_) , source_columns(source_columns_) , aggregation_keys(aggregation_keys_) + , grouping_set_keys(grouping_set_keys_) , prepared_sets(prepared_sets_) , subqueries_for_sets(subqueries_for_sets_) , no_subqueries(no_subqueries_) @@ -834,6 +838,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { auto arguments_column_name = data.getUniqueName("__grouping_args"); { + if (!data.hasColumn("__grouping_set_map")) + { + ColumnWithTypeAndName column; + column.name = "__grouping_set_map"; + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + auto map_column = ColumnFixedString::create(map_size); + map_column->insertString(key_map); + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + + data.addColumn(column); + } ColumnWithTypeAndName column; column.name = arguments_column_name; column.type = std::make_shared(std::make_shared()); @@ -853,7 +879,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addFunction( FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set_map", arguments_column_name }, + { "__grouping_set", "__grouping_set_map", arguments_column_name }, column_name ); return; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 313eae9fc8d..b7d2905ac73 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -89,6 +90,7 @@ public: size_t subquery_depth; const NamesAndTypesList & source_columns; const NamesAndTypesList & aggregation_keys; + const ColumnNumbersList & grouping_set_keys; PreparedSets & prepared_sets; SubqueriesForSets & subqueries_for_sets; bool no_subqueries; @@ -111,6 +113,7 @@ public: size_t subquery_depth_, const NamesAndTypesList & source_columns_, const NamesAndTypesList & aggregation_keys_, + const ColumnNumbersList & grouping_set_keys_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 8c3ea878718..1a2cb4ace1a 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -43,6 +43,8 @@ #include #include +#include +#include #include #include @@ -326,7 +328,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) { if (ASTPtr group_by_ast = select_query->groupBy()) { - NameSet unique_keys; + NameToIndexMap unique_keys; ASTs & group_asts = group_by_ast->children; /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column @@ -348,6 +350,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) group_elements_ast = group_ast_element->children; NamesAndTypesList grouping_set_list; + ColumnNumbers grouping_set_indexes_list; for (ssize_t j = 0; j < ssize_t(group_elements_ast.size()); ++j) { @@ -388,15 +391,21 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// Aggregation keys are unique. if (!unique_keys.contains(key.name)) { - unique_keys.insert(key.name); + unique_keys[key.name] = aggregation_keys.size(); + grouping_set_indexes_list.push_back(aggregation_keys.size()); aggregation_keys.push_back(key); /// Key is no longer needed, therefore we can save a little by moving it. aggregated_columns.push_back(std::move(key)); } + else + { + grouping_set_indexes_list.push_back(unique_keys[key.name]); + } } aggregation_keys_list.push_back(std::move(grouping_set_list)); + aggregation_keys_indexes_list.push_back(std::move(grouping_set_indexes_list)); } else { @@ -434,7 +443,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// Aggregation keys are uniqued. if (!unique_keys.contains(key.name)) { - unique_keys.insert(key.name); + unique_keys[key.name] = aggregation_keys.size(); aggregation_keys.push_back(key); /// Key is no longer needed, therefore we can save a little by moving it. @@ -443,8 +452,8 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) - aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + // if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + // aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); if (group_asts.empty()) { @@ -582,6 +591,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ subquery_depth, sourceColumns(), aggregation_keys, + aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -603,6 +613,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP subquery_depth, sourceColumns(), aggregation_keys, + aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -625,6 +636,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( subquery_depth, sourceColumns(), aggregation_keys, + aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index b3704095c92..1200091efef 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -65,6 +66,7 @@ struct ExpressionAnalyzerData bool has_aggregation = false; NamesAndTypesList aggregation_keys; NamesAndTypesLists aggregation_keys_list; + ColumnNumbersList aggregation_keys_indexes_list; bool has_const_aggregation_keys = false; AggregateDescriptions aggregate_descriptions; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c8e04777574..5f165f9d535 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -757,8 +757,8 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } - if (analysis_result.use_grouping_set_key) - res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); + // if (analysis_result.use_grouping_set_key) + // res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); return res; } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 87588facff2..9c2b5a44914 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -35,7 +35,7 @@ static ITransformingStep::Traits getTraits() }; } -static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params, size_t keys_size) +static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) { if (params.empty()) return block; @@ -50,9 +50,9 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); - auto map_column = ColumnFixedString::create(keys_size + 1); - map_column->resize(rows); - res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); + // auto map_column = ColumnFixedString::create(keys_size + 1); + // map_column->resize(rows); + // res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); return res; } @@ -69,7 +69,7 @@ AggregatingStep::AggregatingStep( bool storage_has_evenly_distributed_read_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_) - : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_, params_.keys_size), getTraits(), false) + : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_), getTraits(), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) , final(std::move(final_)) @@ -243,21 +243,21 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } - { - std::string grouping_map; - grouping_map.reserve(params.keys_size + 1); - std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); - for (auto key : params.keys) - grouping_map += key_set.contains(key) ? '1' : '0'; - grouping_map += '0'; - auto nested_column = ColumnFixedString::create(params.keys_size + 1); - nested_column->insertString(grouping_map); - auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); - const auto * grouping_map_node = &dag->addColumn( - {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); - grouping_map_node = &dag->materializeNode(*grouping_map_node); - index.push_back(grouping_map_node); - } + // { + // std::string grouping_map; + // grouping_map.reserve(params.keys_size + 1); + // std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); + // for (auto key : params.keys) + // grouping_map += key_set.contains(key) ? '1' : '0'; + // grouping_map += '0'; + // auto nested_column = ColumnFixedString::create(params.keys_size + 1); + // nested_column->insertString(grouping_map); + // auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); + // const auto * grouping_map_node = &dag->addColumn( + // {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); + // grouping_map_node = &dag->materializeNode(*grouping_map_node); + // index.push_back(grouping_map_node); + // } dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 99f3b86ac26..ef25612f63e 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); From eba60ff38f220b6c2e9ab6142ebd4760df8e706c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 13 May 2022 17:00:47 +0200 Subject: [PATCH 012/150] Fix restorable schema --- .../registerDiskAzureBlobStorage.cpp | 2 +- src/Disks/DiskDecorator.cpp | 4 +- src/Disks/DiskDecorator.h | 2 +- src/Disks/DiskLocal.cpp | 4 +- src/Disks/DiskLocal.h | 2 +- src/Disks/DiskObjectStorage.cpp | 114 +++++++++++++----- src/Disks/DiskRestartProxy.cpp | 7 +- src/Disks/DiskRestartProxy.h | 2 +- src/Disks/IDisk.h | 2 +- .../IO/WriteIndirectBufferFromRemoteFS.cpp | 3 +- src/Disks/S3/registerDiskS3.cpp | 4 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- 12 files changed, 100 insertions(+), 48 deletions(-) diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 8b2429263bb..56df793783e 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -103,7 +103,7 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) checkRemoveAccess(*azure_blob_storage_disk); } - azure_blob_storage_disk->startup(); + azure_blob_storage_disk->startup(context); if (config.getBool(config_prefix + ".cache_enabled", true)) { diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 80cfc23d210..02babfbb59f 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -211,9 +211,9 @@ void DiskDecorator::shutdown() delegate->shutdown(); } -void DiskDecorator::startup() +void DiskDecorator::startup(ContextPtr context) { - delegate->startup(); + delegate->startup(context); } void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index d707eb3e51d..b86c520d5d8 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -71,7 +71,7 @@ public: void onFreeze(const String & path) override; SyncGuardPtr getDirectorySyncGuard(const String & path) const override; void shutdown() override; - void startup() override; + void startup(ContextPtr context) override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; String getCacheBasePath() const override { return delegate->getCacheBasePath(); } std::vector getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index a55d588f2b5..e1e299a0d52 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -484,7 +484,7 @@ DiskLocal::DiskLocal( disk_checker = std::make_unique(this, context, local_disk_check_period_ms); } -void DiskLocal::startup() +void DiskLocal::startup(ContextPtr) { try { @@ -672,7 +672,7 @@ void registerDiskLocal(DiskFactory & factory) std::shared_ptr disk = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); - disk->startup(); + disk->startup(context); return std::make_shared(disk); }; factory.registerDiskType("local", creator); diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 61faccbe2a5..101bf0e1f13 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -110,7 +110,7 @@ public: bool isBroken() const override { return broken; } - void startup() override; + void startup(ContextPtr) override; void shutdown() override; diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 04adebf1e82..8f472c713b7 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -32,6 +32,12 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } +static String revisionToString(UInt64 revision) +{ + return std::bitset<64>(revision).to_string(); +} + + DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) { Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); @@ -340,16 +346,35 @@ size_t DiskObjectStorage::getFileSize(const String & path) const return readMetadata(path).total_size; } -void DiskObjectStorage::moveFile(const String & from_path, const String & to_path) +void DiskObjectStorage::moveFile(const String & from_path, const String & to_path, bool should_send_metadata) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "MOVE FILE"); if (exists(to_path)) throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); + if (should_send_metadata) + { + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter += 1; + + const ObjectAttributes object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + metadata_helper->createFileOperationObject("rename", revision, object_metadata); + } + metadata_disk->moveFile(from_path, to_path); } +void DiskObjectStorage::moveFile(const String & from_path, const String & to_path) +{ + moveFile(from_path, to_path, send_metadata); +} + void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "REPLACE FILE"); if (exists(to_path)) { const String tmp_path = to_path + ".old"; @@ -363,6 +388,7 @@ void DiskObjectStorage::replaceFile(const String & from_path, const String & to_ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Remove shared file"); std::vector paths_to_remove; removeMetadata(path, paths_to_remove); @@ -372,6 +398,7 @@ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metada void DiskObjectStorage::removeFromRemoteFS(const std::vector & paths) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Read from remote FS"); object_storage->removeObjects(paths); } @@ -416,17 +443,35 @@ bool DiskObjectStorage::checkUniqueId(const String & id) const return checkObjectExists(id); } -void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path) +void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "HARDLINK FILE"); readUpdateAndStoreMetadata(src_path, false, [](Metadata & metadata) { metadata.ref_count++; return true; }); + if (should_send_metadata && !dst_path.starts_with("shadow/")) + { + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter += 1; + const ObjectAttributes object_metadata { + {"src_path", src_path}, + {"dst_path", dst_path} + }; + metadata_helper->createFileOperationObject("hardlink", revision, object_metadata); + } + /// Create FS hardlink to metadata file. metadata_disk->createHardLink(src_path, dst_path); - } +void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path) +{ + createHardLink(src_path, dst_path, send_metadata); +} + + void DiskObjectStorage::setReadOnly(const String & path) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "set readonly"); /// We should store read only flag inside metadata file (instead of using FS flag), /// because we modify metadata file when create hard-links from it. readUpdateAndStoreMetadata(path, false, [](Metadata & metadata) { metadata.read_only = true; return true; }); @@ -560,15 +605,19 @@ void DiskObjectStorage::removeMetadataRecursive(const String & path, std::unorde void DiskObjectStorage::shutdown() { + LOG_INFO(log, "Shutting down disk {}", name); object_storage->shutdown(); + LOG_INFO(log, "Disk {} shut down", name); } -void DiskObjectStorage::startup() +void DiskObjectStorage::startup(ContextPtr context) { LOG_INFO(log, "Starting up disk {}", name); object_storage->startup(); + restoreMetadataIfNeeded(context->getConfigRef(), "storage_configuration.disks." + name, context); + LOG_INFO(log, "Disk {} started up", name); } @@ -649,13 +698,24 @@ std::unique_ptr DiskObjectStorage::writeFile( { auto blob_name = getRandomASCIIString(); + std::optional object_attributes; + if (send_metadata) + { + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter++; + object_attributes = { + {"path", path} + }; + blob_name = "r" + revisionToString(revision) + "-file-" + blob_name; + } + auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, {}, create_metadata_callback, buf_size, settings); + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, object_attributes, create_metadata_callback, buf_size, settings); } @@ -725,10 +785,6 @@ DiskObjectStorageReservation::~DiskObjectStorageReservation() } } -static String revisionToString(UInt64 revision) -{ - return std::bitset<64>(revision).to_string(); -} void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const { @@ -877,8 +933,11 @@ void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { + LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name); + if (!disk->exists(RESTORE_FILE_NAME)) { + LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME); return; } @@ -925,6 +984,7 @@ void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfigur if (disk->exists(root)) disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); + LOG_INFO(disk->log, "Old metadata removed, restoring new one"); restoreFiles(source_object_storage, information); restoreFileOperations(source_object_storage, information); @@ -1024,6 +1084,9 @@ void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_objec std::vector keys_names; for (const auto & [key, size] : keys) { + + LOG_INFO(disk->log, "Calling restore for key for disk {}", key); + /// Skip file operations objects. They will be processed separately. if (key.find("/operations/") != String::npos) continue; @@ -1051,6 +1114,7 @@ void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_objec BlobsPathToSize children; source_object_storage->listPrefix(restore_information.source_path, children); + restore_files(children); for (auto & result : results) @@ -1091,7 +1155,7 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * sourc auto relative_key = shrinkKey(source_path, key); /// Copy object if we restore to different bucket / path. - if (disk->remote_fs_root_path != source_path) + if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != source_path) source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) @@ -1107,6 +1171,14 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * sourc } +void DiskObjectStorage::onFreeze(const String & path) +{ + createDirectories(path); + auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); + writeIntText(metadata_helper->revision_counter.load(), *revision_file_buf); + revision_file_buf->finalize(); +} + static String pathToDetached(const String & source_path) { if (source_path.ends_with('/')) @@ -1150,16 +1222,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * sou auto to_path = object_attributes["to_path"]; if (disk->exists(from_path)) { - disk->moveFile(from_path, to_path); - if (send_metadata) - { - auto next_revision = ++revision_counter; - const ObjectAttributes object_metadata { - {"from_path", from_path}, - {"to_path", to_path} - }; - createFileOperationObject("rename", next_revision, object_attributes); - } + disk->moveFile(from_path, to_path, send_metadata); LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); @@ -1187,16 +1250,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * sou if (disk->exists(src_path)) { disk->createDirectories(directoryPath(dst_path)); - if (send_metadata && !dst_path.starts_with("shadow/")) - { - auto next_revision = ++revision_counter; - const ObjectAttributes object_metadata { - {"src_path", src_path}, - {"dst_path", dst_path} - }; - createFileOperationObject("hardlink", next_revision, object_attributes); - } - disk->createHardLink(src_path, dst_path); + disk->createHardLink(src_path, dst_path, send_metadata); LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); } } diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 8bb31cec55f..903caf705c5 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -6,8 +6,7 @@ namespace DB { namespace ErrorCodes -{ - extern const int DEADLOCK_AVOIDED; +{extern const int DEADLOCK_AVOIDED; } using Millis = std::chrono::milliseconds; @@ -329,7 +328,7 @@ void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector< return DiskDecorator::getRemotePathsRecursive(path, paths_map); } -void DiskRestartProxy::restart() +void DiskRestartProxy::restart(ContextPtr context) { /// Speed up processing unhealthy requests. DiskDecorator::shutdown(); @@ -352,7 +351,7 @@ void DiskRestartProxy::restart() LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName()); - DiskDecorator::startup(); + DiskDecorator::startup(context); LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName()); } diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index d30c2fdbbfb..084e06e3f18 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -68,7 +68,7 @@ public: std::vector getRemotePaths(const String & path) const override; void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; - void restart(); + void restart(ContextPtr context); private: friend class RestartAwareReadBuffer; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 1071e1294b6..cf8b1a09ce9 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -297,7 +297,7 @@ public: virtual void shutdown() {} /// Performs action on disk startup. - virtual void startup() {} + virtual void startup(ContextPtr) {} /// Return some uniq string for file, overrode for IDiskRemote /// Required for distinguish different copies of the same part on remote disk diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp index dca2fb17ba7..77da60ca07d 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp @@ -36,7 +36,8 @@ WriteIndirectBufferFromRemoteFS::~WriteIndirectBufferFromRemoteFS() void WriteIndirectBufferFromRemoteFS::finalizeImpl() { WriteBufferFromFileDecorator::finalizeImpl(); - create_metadata_callback(count()); + if (create_metadata_callback) + create_metadata_callback(count()); } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 54b736788fa..b344375f05b 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -96,9 +96,7 @@ void registerDiskS3(DiskFactory & factory) checkRemoveAccess(*s3disk); } - s3disk->startup(); - - s3disk->restoreMetadataIfNeeded(config, config_prefix, context); + s3disk->startup(context); std::shared_ptr disk_result = s3disk; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index b52645c7854..d49ab933f23 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -780,7 +780,7 @@ void InterpreterSystemQuery::restartDisk(String & name) auto disk = getContext()->getDisk(name); if (DiskRestartProxy * restart_proxy = dynamic_cast(disk.get())) - restart_proxy->restart(); + restart_proxy->restart(getContext()); else throw Exception("Disk " + name + " doesn't have possibility to restart", ErrorCodes::BAD_ARGUMENTS); } From b6f4a1b9f9ecc0cf6aef72b72e68ec185f7b69cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 13 May 2022 17:21:04 +0200 Subject: [PATCH 013/150] Missed change --- src/Disks/DiskObjectStorage.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 7e5d30dfea2..83a3ba6b508 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -88,6 +88,8 @@ public: void moveFile(const String & from_path, const String & to_path) override; + void moveFile(const String & from_path, const String & to_path, bool should_send_metadata); + void replaceFile(const String & from_path, const String & to_path) override; void removeFile(const String & path) override { removeSharedFile(path, false); } @@ -119,6 +121,7 @@ public: bool checkUniqueId(const String & id) const override; void createHardLink(const String & src_path, const String & dst_path) override; + void createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata); void listFiles(const String & path, std::vector & file_names) override; @@ -146,7 +149,7 @@ public: void shutdown() override; - void startup() override; + void startup(ContextPtr context) override; ReservationPtr reserve(UInt64 bytes) override; @@ -165,6 +168,8 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; void restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); + + void onFreeze(const String & path) override; private: const String name; const String remote_fs_root_path; From a2870ef65ec44ecf14ecb9ddc192d2705f7f01ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 13 May 2022 17:28:05 +0200 Subject: [PATCH 014/150] Allow negative values for seconds --- src/Core/SettingsFields.cpp | 4 ++-- tests/queries/0_stateless/02127_connection_drain.sh | 2 +- .../0_stateless/02294_decimal_second_errors.reference | 3 +++ .../0_stateless/02294_decimal_second_errors.sql | 10 ++++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 14c831f08f1..a27013ed6cf 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -183,9 +183,9 @@ namespace { Poco::Timespan::TimeDiff float64AsSecondsToTimespan(Float64 d) { - if (std::signbit(d) || (d != 0.0 && !std::isnormal(d))) + if (d != 0.0 && !std::isnormal(d)) throw Exception( - ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a positive normal floating point number. Got {}", d); + ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a normal floating point number or zero. Got {}", d); return static_cast(d *= 1000000); } diff --git a/tests/queries/0_stateless/02127_connection_drain.sh b/tests/queries/0_stateless/02127_connection_drain.sh index 597497e9b78..523b02d9bd5 100755 --- a/tests/queries/0_stateless/02127_connection_drain.sh +++ b/tests/queries/0_stateless/02127_connection_drain.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # sync drain for _ in {1..100}; do prev=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") - curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=0 format Null" + curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=-1 format Null" now=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") if [[ "$prev" != $(( now-2 )) ]]; then continue diff --git a/tests/queries/0_stateless/02294_decimal_second_errors.reference b/tests/queries/0_stateless/02294_decimal_second_errors.reference index e8183f05f5d..a9e2f17562a 100644 --- a/tests/queries/0_stateless/02294_decimal_second_errors.reference +++ b/tests/queries/0_stateless/02294_decimal_second_errors.reference @@ -1,3 +1,6 @@ 1 1 1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02294_decimal_second_errors.sql b/tests/queries/0_stateless/02294_decimal_second_errors.sql index 1beffc1e7e5..7a073f64d9a 100644 --- a/tests/queries/0_stateless/02294_decimal_second_errors.sql +++ b/tests/queries/0_stateless/02294_decimal_second_errors.sql @@ -1,9 +1,11 @@ SELECT 1 SETTINGS max_execution_time=NaN; -- { serverError 72 } SELECT 1 SETTINGS max_execution_time=Infinity; -- { serverError 72 }; SELECT 1 SETTINGS max_execution_time=-Infinity; -- { serverError 72 }; -SELECT 1 SETTINGS max_execution_time=-0.5; -- { serverError 72 }; -SELECT 1 SETTINGS max_execution_time=-0.000000000001; -- { serverError 72 }; -SELECT 1 SETTINGS max_execution_time=-0.0; -- { serverError 72 }; + +-- Ok values +SELECT 1 SETTINGS max_execution_time=-0.5; +SELECT 1 SETTINGS max_execution_time=0.5; +SELECT 1 SETTINGS max_execution_time=-1; SELECT 1 SETTINGS max_execution_time=0.0; -SELECT 1 SETTINGS max_execution_time=10.5; +SELECT 1 SETTINGS max_execution_time=-0.0; SELECT 1 SETTINGS max_execution_time=10; From efb30bdf6446be2364aff687ed590cc8a02c8c46 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 13 May 2022 18:20:12 +0000 Subject: [PATCH 015/150] Correctly use __grouping_set_map column --- src/Functions/grouping.cpp | 12 +++++------- src/Interpreters/ActionsVisitor.cpp | 2 -- .../0_stateless/02293_grouping_function.reference | 12 ++++++++++++ .../queries/0_stateless/02293_grouping_function.sql | 10 ++++++++++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index c1b349ce9da..1849bd0e9a5 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -1,14 +1,11 @@ #include -#include #include "Columns/ColumnsNumber.h" #include #include -#include #include #include #include #include -#include namespace DB { @@ -54,18 +51,19 @@ public: auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); - LOG_DEBUG(&Poco::Logger::get("Grouping"), "Args: {}, rows: {}", arguments.size(), arguments[1].column->getFamilyName()); + auto masks = (*grouping_set_map_column)[0].get(); + auto result = std::make_shared()->createColumn(); for (size_t i = 0; i < input_rows_count; ++i) { UInt64 set_index = grouping_set_column->get64(i); - auto mask = grouping_set_map_column->getDataAt(set_index).toView(); - LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}", mask); + auto mask = masks[set_index].get(); + auto indexes = (*argument_keys_column)[i].get(); UInt64 value = 0; for (auto index : indexes) value = (value << 1) + (mask[index.get()] == '1' ? 1 : 0); - LOG_DEBUG(&Poco::Logger::get("Grouping"), "Mask: {}, Arg: {}, value: {}", mask, toString(indexes), value); + result->insert(Field(value)); } return result; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 2e2cc49af58..40a5f055243 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -850,8 +850,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & std::string key_map(map_size, '0'); for (auto index : grouping_set) key_map[index] = '1'; - auto map_column = ColumnFixedString::create(map_size); - map_column->insertString(key_map); maps_per_set.push_back(key_map); } auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference index 5ea3ca4a15b..f08e6d0ea99 100644 --- a/tests/queries/0_stateless/02293_grouping_function.reference +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -79,3 +79,15 @@ 9 0 0 +0 0 +0 1 +0 1 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index 65771fd479d..3555f9dabab 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -77,3 +77,13 @@ GROUP BY HAVING grouping(number, number % 2, number % 3) = 2 ORDER BY number SETTINGS enable_optimize_predicate_expression = 0; + +SELECT + number, + GROUPING(number, number % 2, number % 3) = 2 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2)) +ORDER BY number, gr; From 23dfe4941b52c88759849bf4b147d8ce41b45a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 13 May 2022 20:31:45 +0200 Subject: [PATCH 016/150] Fix clang tidy error --- src/Core/SettingsFields.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index a27013ed6cf..ccf6162ab8e 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -186,7 +186,7 @@ namespace if (d != 0.0 && !std::isnormal(d)) throw Exception( ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a normal floating point number or zero. Got {}", d); - return static_cast(d *= 1000000); + return static_cast(d * 1000000); } } From 369c18ad7f2b869550b9203a21b617feb66625ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 13 May 2022 23:09:17 +0200 Subject: [PATCH 017/150] Fix clang tidy not error --- src/Core/SettingsFields.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index ccf6162ab8e..7b820401468 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -202,14 +202,14 @@ SettingFieldMilliseconds::SettingFieldTimespan(const Field & f) : SettingFieldTi } template <> -SettingFieldSeconds & SettingFieldSeconds::operator=(const Field & f) +SettingFieldTimespan & SettingFieldSeconds::operator=(const Field & f) { *this = Poco::Timespan{float64AsSecondsToTimespan(fieldToNumber(f))}; return *this; } template <> -SettingFieldMilliseconds & SettingFieldMilliseconds::operator=(const Field & f) +SettingFieldTimespan & SettingFieldMilliseconds::operator=(const Field & f) { *this = fieldToNumber(f); return *this; From 6fc7dfea809eabb81111306d6dcb4f2ce53e53a5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 13 May 2022 23:04:12 +0000 Subject: [PATCH 018/150] Support ordinary GROUP BY --- src/Functions/grouping.cpp | 23 ++++++++ src/Interpreters/ActionsVisitor.cpp | 56 +++++++++++++------ src/Interpreters/ActionsVisitor.h | 4 +- src/Interpreters/ExpressionAnalyzer.cpp | 21 +++++-- src/Interpreters/ExpressionAnalyzer.h | 2 + src/Interpreters/InterpreterSelectQuery.cpp | 4 -- src/Parsers/ASTSelectQuery.h | 2 + src/Processors/QueryPlan/AggregatingStep.cpp | 20 ------- src/Storages/VirtualColumnUtils.cpp | 2 +- ...02293_grouping_function_group_by.reference | 20 +++++++ .../02293_grouping_function_group_by.sql | 18 ++++++ 11 files changed, 123 insertions(+), 49 deletions(-) create mode 100644 tests/queries/0_stateless/02293_grouping_function_group_by.reference create mode 100644 tests/queries/0_stateless/02293_grouping_function_group_by.sql diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index 1849bd0e9a5..eb63764947c 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -45,8 +45,31 @@ public: return std::make_shared(); } + ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); + auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); + + auto aggregation_keys_number = (*grouping_set_map_column)[0].get(); + + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto indexes = (*argument_keys_column)[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (index.get() < aggregation_keys_number ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (arguments.size() == 2) + return executeSingleGroupingSet(arguments, input_rows_count); + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 40a5f055243..5bececb70ae 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -477,7 +477,8 @@ ActionsMatcher::Data::Data( bool no_subqueries_, bool no_makeset_, bool only_consts_, - bool create_source_for_in_) + bool create_source_for_in_, + bool has_grouping_set_column_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -490,6 +491,7 @@ ActionsMatcher::Data::Data( , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) + , has_grouping_set_column(has_grouping_set_column_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) @@ -842,19 +844,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { ColumnWithTypeAndName column; column.name = "__grouping_set_map"; - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) + if (data.has_grouping_set_column) { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + } + else + { + column.type = std::make_shared(); + auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); data.addColumn(column); } @@ -875,11 +886,22 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addColumn(column); } - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set", "__grouping_set_map", arguments_column_name }, - column_name - ); + if (data.has_grouping_set_column) + { + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set", "__grouping_set_map", arguments_column_name }, + column_name + ); + } + else + { + data.addFunction( + FunctionFactory::instance().get("grouping", data.getContext()), + { "__grouping_set_map", arguments_column_name }, + column_name + ); + } return; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b7d2905ac73..3f7f6b5b127 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -97,6 +97,7 @@ public: bool no_makeset; bool only_consts; bool create_source_for_in; + bool has_grouping_set_column; size_t visit_depth; ScopeStack actions_stack; @@ -120,7 +121,8 @@ public: bool no_subqueries_, bool no_makeset_, bool only_consts_, - bool create_source_for_in_); + bool create_source_for_in_, + bool has_grouping_set_column_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1a2cb4ace1a..9c74693e6a2 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -333,8 +333,10 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column /// With set number, which is used as an additional key at the stage of merging aggregating data. - if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1; + if (process_grouping_sets) aggregated_columns.emplace_back("__grouping_set", std::make_shared()); + need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets; for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { @@ -452,8 +454,12 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - // if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) - // aggregated_columns.emplace_back("__grouping_set_map", std::make_shared(aggregation_keys.size() + 1)); + if (!select_query->group_by_with_grouping_sets) + { + auto & list = aggregation_keys_indexes_list.emplace_back(); + for (size_t i = 0; i < aggregation_keys.size(); ++i) + list.push_back(i); + } if (group_asts.empty()) { @@ -598,7 +604,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ no_makeset_for_subqueries, false /* no_makeset */, only_consts, - !isRemoteStorage() /* create_source_for_in */); + !isRemoteStorage() /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -620,7 +627,8 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */, true /* no_makeset */, only_consts, - !isRemoteStorage() /* create_source_for_in */); + !isRemoteStorage() /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -643,7 +651,8 @@ void ExpressionAnalyzer::getRootActionsForHaving( no_makeset_for_subqueries, false /* no_makeset */, only_consts, - true /* create_source_for_in */); + true /* create_source_for_in */, + need_grouping_set_column); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 1200091efef..5db4fda0fcf 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -77,6 +77,8 @@ struct ExpressionAnalyzerData /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. TemporaryTablesMapping external_tables; + + bool need_grouping_set_column = false; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5f165f9d535..6bfadc66352 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -757,9 +756,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl() res.insert({nullptr, type, aggregate.column_name}); } - // if (analysis_result.use_grouping_set_key) - // res.insert({ nullptr, std::make_shared(query_analyzer->aggregationKeys().size() + 1), "__grouping_set_map" }); - return res; } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 704aeeeea7c..b3f53de3c99 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -89,6 +89,8 @@ public: bool group_by_with_grouping_sets = false; bool limit_with_ties = false; + bool needGroupingSetColumn() const noexcept { return group_by_with_cube || group_by_with_rollup || group_by_with_grouping_sets; } + ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 9c2b5a44914..b830c7899bb 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -50,10 +50,6 @@ static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & pa for (auto & col : block) res.insert(std::move(col)); - // auto map_column = ColumnFixedString::create(keys_size + 1); - // map_column->resize(rows); - // res.insert({ColumnPtr(std::move(map_column)), std::make_shared(keys_size + 1), "__grouping_set_map"}); - return res; } @@ -243,22 +239,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); } - // { - // std::string grouping_map; - // grouping_map.reserve(params.keys_size + 1); - // std::unordered_set key_set(grouping_sets_params[set_counter].used_keys.begin(), grouping_sets_params[set_counter].used_keys.end()); - // for (auto key : params.keys) - // grouping_map += key_set.contains(key) ? '1' : '0'; - // grouping_map += '0'; - // auto nested_column = ColumnFixedString::create(params.keys_size + 1); - // nested_column->insertString(grouping_map); - // auto grouping_map_col = ColumnConst::create(ColumnPtr(std::move(nested_column)), 0); - // const auto * grouping_map_node = &dag->addColumn( - // {ColumnPtr(std::move(grouping_map_col)), std::make_shared(grouping_map.length()), "__grouping_set_map"}); - // grouping_map_node = &dag->materializeNode(*grouping_map_node); - // index.push_back(grouping_map_node); - // } - dag->getIndex().swap(index); auto expression = std::make_shared(dag, settings.getActionsSettings()); auto transform = std::make_shared(header, expression); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ef25612f63e..c1824206b60 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference new file mode 100644 index 00000000000..38578d6ad1d --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -0,0 +1,20 @@ +0 1 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +0 1 1 +1 1 1 +2 1 1 +3 1 1 +4 1 1 +5 1 1 +6 1 1 +7 1 1 +8 1 1 +9 1 1 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql new file mode 100644 index 00000000000..5b12c34adac --- /dev/null +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -0,0 +1,18 @@ +SELECT + number, + grouping(number, number % 2, number % 3) = 6 +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; + +SELECT + number, + grouping(number), + GROUPING(number % 2) +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; From 644b7c01bd199def618a7259fa71d2dc505d40db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Sat, 14 May 2022 01:19:19 +0200 Subject: [PATCH 019/150] Lower the value of settings to match old behaviour Before the change to use floats, this settings were getting 0 as the parsed value, so keep the old value by setting to zero explicitly --- tests/queries/0_stateless/01287_max_execution_speed.sql | 4 ++-- .../0_stateless/01290_max_execution_speed_distributed.sql | 2 +- .../1_stateful/00156_max_execution_speed_sample_merge.sql | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01287_max_execution_speed.sql b/tests/queries/0_stateless/01287_max_execution_speed.sql index 7dbeab2d635..6e6c71e445c 100644 --- a/tests/queries/0_stateless/01287_max_execution_speed.sql +++ b/tests/queries/0_stateless/01287_max_execution_speed.sql @@ -1,11 +1,11 @@ -- Tags: no-fasttest -SET min_execution_speed = 100000000000, timeout_before_checking_execution_speed = 0.1; +SET min_execution_speed = 100000000000, timeout_before_checking_execution_speed = 0; SELECT count() FROM system.numbers; -- { serverError 160 } SELECT 'Ok (1)'; SET min_execution_speed = 0; -SET min_execution_speed_bytes = 800000000000, timeout_before_checking_execution_speed = 0.1; +SET min_execution_speed_bytes = 800000000000, timeout_before_checking_execution_speed = 0; SELECT count() FROM system.numbers; -- { serverError 160 } SELECT 'Ok (2)'; SET min_execution_speed_bytes = 0; diff --git a/tests/queries/0_stateless/01290_max_execution_speed_distributed.sql b/tests/queries/0_stateless/01290_max_execution_speed_distributed.sql index 8dcac23550d..d0dc554f425 100644 --- a/tests/queries/0_stateless/01290_max_execution_speed_distributed.sql +++ b/tests/queries/0_stateless/01290_max_execution_speed_distributed.sql @@ -1,7 +1,7 @@ -- Tags: distributed SET max_execution_speed = 1000000; -SET timeout_before_checking_execution_speed = 0.001; +SET timeout_before_checking_execution_speed = 0; SET max_block_size = 100; SET log_queries=1; diff --git a/tests/queries/1_stateful/00156_max_execution_speed_sample_merge.sql b/tests/queries/1_stateful/00156_max_execution_speed_sample_merge.sql index 37e91296f14..e325c18200b 100644 --- a/tests/queries/1_stateful/00156_max_execution_speed_sample_merge.sql +++ b/tests/queries/1_stateful/00156_max_execution_speed_sample_merge.sql @@ -1,4 +1,4 @@ -SET max_execution_speed = 4000000, timeout_before_checking_execution_speed = 0.001; +SET max_execution_speed = 4000000, timeout_before_checking_execution_speed = 0; CREATE TEMPORARY TABLE times (t DateTime); From 4540cf8925f3327978fa35416b6cdc26c459485f Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 13:46:47 +0200 Subject: [PATCH 020/150] Style check fixes --- src/Disks/DiskObjectStorage.cpp | 3 +- src/Disks/IDisk.h | 2 +- src/Disks/S3/diskSettings.cpp | 5 ++ src/Disks/S3ObjectStorage.cpp | 54 +++++++++----------- src/Disks/S3ObjectStorage.h | 2 +- tests/integration/test_merge_tree_s3/test.py | 4 +- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 8f472c713b7..01f01fdeaa4 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -29,7 +29,8 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; extern const int MEMORY_LIMIT_EXCEEDED; - extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } static String revisionToString(UInt64 revision) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index cf8b1a09ce9..e4a0b84448c 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -290,7 +290,7 @@ public: virtual bool isReadOnly() const { return false; } - /// Check if disk is broken. Broken disks will have 0 space and connot be used. + /// Check if disk is broken. Broken disks will have 0 space and cannot be used. virtual bool isBroken() const { return false; } /// Invoked when Global Context is shutdown. diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp index c4cd3253a21..579f160abd4 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/S3/diskSettings.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { S3Settings::ReadWriteSettings rw_settings; diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 0a7bd45d546..e6c3d357265 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -35,10 +35,6 @@ namespace DB namespace ErrorCodes { extern const int S3_ERROR; - extern const int FILE_ALREADY_EXISTS; - extern const int UNKNOWN_FORMAT; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; } namespace @@ -82,14 +78,10 @@ bool S3ObjectStorage::exists(const std::string & path) const if (!object_head.IsSuccess()) { if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT DOESNT {} EXISTS", path); return false; - } throwIfError(object_head); } - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT {} EXISTS", path); return true; } @@ -102,31 +94,31 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT std::optional) const { - ReadSettings disk_read_settings{read_settings}; - if (cache) - { - if (IFileCache::isReadOnly()) - disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + ReadSettings disk_read_settings{read_settings}; + if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; - disk_read_settings.remote_fs_cache = cache; - } + disk_read_settings.remote_fs_cache = cache; + } - auto settings_ptr = s3_settings.get(); + auto settings_ptr = s3_settings.get(); - auto s3_impl = std::make_unique( - client.get(), bucket, version_id, common_path_prefix, blobs_to_read, - settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); + auto s3_impl = std::make_unique( + client.get(), bucket, version_id, common_path_prefix, blobs_to_read, + settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); - if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - auto reader = getThreadPoolReader(); - return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); - } - else - { - auto buf = std::make_unique(std::move(s3_impl)); - return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); - } + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + { + auto reader = getThreadPoolReader(); + return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); + } + else + { + auto buf = std::make_unique(std::move(s3_impl)); + return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); + } } std::unique_ptr S3ObjectStorage::readObject( /// NOLINT @@ -135,8 +127,8 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::optional, std::optional) const { - auto settings_ptr = s3_settings.get(); - return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); + auto settings_ptr = s3_settings.get(); + return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); } diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index 7632a643130..81595d4385d 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -56,7 +56,7 @@ public: , s3_settings(std::move(s3_settings_)) , version_id(std::move(version_id_)) {} - + bool exists(const std::string & path) const override; std::unique_ptr readObject( /// NOLINT diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index c7041c05403..38553f27ac1 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -229,7 +229,9 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): assert ( node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" ) - wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45) + wait_for_delete_s3_objects( + cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45 + ) @pytest.mark.parametrize("node_name", ["node"]) From 47834cbabea440880b87d787b5d21688ae1372d6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 14:06:09 +0200 Subject: [PATCH 021/150] Satisfy clang-tidy and style check --- src/Disks/DiskObjectStorage.cpp | 2 +- src/Disks/IObjectStorage.h | 11 +++++++++-- src/Disks/S3ObjectStorage.cpp | 2 +- src/Disks/S3ObjectStorage.h | 11 +++++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 01f01fdeaa4..d2384a82d13 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -1003,7 +1003,7 @@ void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfigur } } -void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) +void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) /// NOLINT { auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); buffer->next(); diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h index 6a66ffb622e..64ba6e75281 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/IObjectStorage.h @@ -95,9 +95,16 @@ public: virtual void removeObjectsIfExist(const std::vector & paths) = 0; - virtual void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + virtual void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) = 0; - virtual void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}); + virtual void copyObjectToAnotherObjectStorage( /// NOLINT + const std::string & object_from, + const std::string & object_to, + IObjectStorage & object_storage_to, + std::optional object_to_attributes = {}); virtual ~IObjectStorage() = default; diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index e6c3d357265..94a9a42807a 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -132,7 +132,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT } -std::unique_ptr S3ObjectStorage::writeObject( +std::unique_ptr S3ObjectStorage::writeObject( /// NOLINT const std::string & path, std::optional attributes, FinalizeCallback && finalize_callback, diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index 81595d4385d..fcc99ae1d91 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -92,9 +92,16 @@ public: ObjectMetadata getObjectMetadata(const std::string & path) const override; - void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) override; + void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) override; - void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}) override; + void copyObjectToAnotherObjectStorage( /// NOLINT + const std::string & object_from, + const std::string & object_to, + IObjectStorage & object_storage_to, + std::optional object_to_attributes = {}) override; void shutdown() override; From 32e0ca22be26623cf1c698c1c0b9152b0a2454be Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 14:49:01 +0200 Subject: [PATCH 022/150] Format queries better --- ...emote_filesystem_cache_on_insert.reference | 48 +++++++++++++++-- ...2241_remote_filesystem_cache_on_insert.sql | 51 +++++++++++++++++-- 2 files changed, 93 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index b2269c16264..783227d5587 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -4,13 +4,41 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 SELECT count() FROM system.filesystem_cache; 0 INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 @@ -33,7 +61,21 @@ SELECT count() size FROM system.filesystem_cache; 7 SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index c3ab1de3693..31d4ca99abb 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -9,13 +9,43 @@ CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SET SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; @@ -33,7 +63,22 @@ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; From 0e0b535b257e328a983792e0735fae4908f7b82d Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 16:54:39 +0200 Subject: [PATCH 023/150] Fix failing test --- src/Disks/DiskObjectStorage.cpp | 13 ------------- src/Disks/DiskObjectStorage.h | 5 +++++ src/Disks/S3/registerDiskS3.cpp | 10 +++++++++- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index d2384a82d13..2f0dad9e8b8 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -349,7 +349,6 @@ size_t DiskObjectStorage::getFileSize(const String & path) const void DiskObjectStorage::moveFile(const String & from_path, const String & to_path, bool should_send_metadata) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "MOVE FILE"); if (exists(to_path)) throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); @@ -375,7 +374,6 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "REPLACE FILE"); if (exists(to_path)) { const String tmp_path = to_path + ".old"; @@ -389,7 +387,6 @@ void DiskObjectStorage::replaceFile(const String & from_path, const String & to_ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Remove shared file"); std::vector paths_to_remove; removeMetadata(path, paths_to_remove); @@ -399,7 +396,6 @@ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metada void DiskObjectStorage::removeFromRemoteFS(const std::vector & paths) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Read from remote FS"); object_storage->removeObjects(paths); } @@ -446,7 +442,6 @@ bool DiskObjectStorage::checkUniqueId(const String & id) const void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "HARDLINK FILE"); readUpdateAndStoreMetadata(src_path, false, [](Metadata & metadata) { metadata.ref_count++; return true; }); if (should_send_metadata && !dst_path.starts_with("shadow/")) @@ -472,7 +467,6 @@ void DiskObjectStorage::createHardLink(const String & src_path, const String & d void DiskObjectStorage::setReadOnly(const String & path) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "set readonly"); /// We should store read only flag inside metadata file (instead of using FS flag), /// because we modify metadata file when create hard-links from it. readUpdateAndStoreMetadata(path, false, [](Metadata & metadata) { metadata.read_only = true; return true; }); @@ -729,19 +723,12 @@ void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfig { if (send_metadata) { - LOG_DEBUG(log, "START RESTORING METADATA"); metadata_helper->restore(config, config_prefix, context); if (metadata_helper->readSchemaVersion(object_storage.get(), remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) - { - LOG_DEBUG(log, "DONE READING"); metadata_helper->migrateToRestorableSchema(); - LOG_DEBUG(log, "MIGRATION FINISHED"); - } - LOG_DEBUG(log, "SEARCHING LAST REVISION"); metadata_helper->findLastRevision(); - LOG_DEBUG(log, "DONE RESTORING METADATA"); } } diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 83a3ba6b508..d04bb3d0d04 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -56,6 +56,11 @@ public: void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; + std::string getCacheBasePath() const override + { + return object_storage->getCacheBasePath(); + } + /// Methods for working with metadata. For some operations (like hardlink /// creation) metadata can be updated concurrently from multiple threads /// (file actually rewritten on disk). So additional RW lock is required for diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index b344375f05b..de1194d6daf 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -37,7 +37,15 @@ namespace void checkWriteAccess(IDisk & disk) { auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); - file->write("test", 4); + try + { + file->write("test", 4); + } + catch (...) + { + file->finalize(); + throw; + } } void checkReadAccess(const String & disk_name, IDisk & disk) From 61014dbde649048bbc3fe3b0c1cbd7f43cc58b7d Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 18:41:54 +0200 Subject: [PATCH 024/150] Simlify strange test --- src/Disks/DiskObjectStorage.cpp | 3 +-- src/Disks/DiskObjectStorage.h | 2 +- src/Disks/IObjectStorage.cpp | 2 +- src/Disks/S3ObjectStorage.cpp | 5 ++--- tests/integration/test_merge_tree_s3_restore/test.py | 7 ++++--- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 2f0dad9e8b8..b2d2bf23652 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -14,7 +14,6 @@ #include #include #include -#include namespace DB @@ -1114,7 +1113,7 @@ void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_objec } -void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) +void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const { for (const auto & key : keys) { diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index d04bb3d0d04..a67cc78b82b 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -315,7 +315,7 @@ public: void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys); + void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); std::atomic revision_counter = 0; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp index 44b9430172b..538cc702791 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/IObjectStorage.cpp @@ -35,7 +35,7 @@ void IObjectStorage::removeFromCache(const std::string & path) } } -void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) // NOLINT { if (&object_storage_to == this) copyObject(object_from, object_to, object_to_attributes); diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 94a9a42807a..3f26937a29b 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -287,7 +286,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons return result; } -void S3ObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +void S3ObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) // NOLINT { /// Shortcut for S3 if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) @@ -399,7 +398,7 @@ void S3ObjectStorage::copyObjectMultipartImpl(const String & src_bucket, const S } } -void S3ObjectStorage::copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes) +void S3ObjectStorage::copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes) // NOLINT { auto head = requestObjectHeadData(bucket, object_from).GetResult(); if (head.GetContentLength() >= static_cast(5UL * 1024 * 1024 * 1024)) diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index 6ae63db52ef..d9a58e5e5b1 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -88,6 +88,8 @@ def generate_values(date_str, count, sign=1): def create_table( node, table_name, attach=False, replicated=False, db_atomic=False, uuid="" ): + node.query("DROP DATABASE IF EXISTS s3") + node.query( "CREATE DATABASE IF NOT EXISTS s3 ENGINE = {engine}".format( engine="Atomic" if db_atomic else "Ordinary" @@ -501,8 +503,8 @@ def test_restore_mutations(cluster, db_atomic): ) == "({})".format(4096) -@pytest.mark.parametrize("db_atomic", [False, True]) -def test_migrate_to_restorable_schema(cluster, db_atomic): +def test_migrate_to_restorable_schema(cluster): + db_atomic = True node = cluster.instances["node_not_restorable"] create_table(node, "test", db_atomic=db_atomic) @@ -556,7 +558,6 @@ def test_migrate_to_restorable_schema(cluster, db_atomic): "SELECT sum(id) FROM s3.test FORMAT Values" ) == "({})".format(0) - @pytest.mark.parametrize("replicated", [False, True]) @pytest.mark.parametrize("db_atomic", [False, True]) def test_restore_to_detached(cluster, replicated, db_atomic): From 4e7e304e3ded79d064b82bcd7d5553caad37012f Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 18:53:30 +0200 Subject: [PATCH 025/150] Fix BLACK --- tests/integration/test_merge_tree_s3_restore/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index d9a58e5e5b1..e6ca4a78c25 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -558,6 +558,7 @@ def test_migrate_to_restorable_schema(cluster): "SELECT sum(id) FROM s3.test FORMAT Values" ) == "({})".format(0) + @pytest.mark.parametrize("replicated", [False, True]) @pytest.mark.parametrize("db_atomic", [False, True]) def test_restore_to_detached(cluster, replicated, db_atomic): From 4f1d4dde2236dcd1f301ac64c477af6cbba693fe Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 May 2022 00:16:00 +0200 Subject: [PATCH 026/150] Disable s3 logging by default --- src/Core/Settings.h | 1 + src/Disks/S3/diskSettings.cpp | 3 +- src/IO/S3/PocoHTTPClient.cpp | 52 +++++++++++++++++++++-------------- src/IO/S3/PocoHTTPClient.h | 6 ++-- src/IO/S3Common.cpp | 23 +++++++++++----- src/IO/S3Common.h | 4 ++- src/Storages/StorageS3.cpp | 3 +- 7 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2e6d657698c..505a519aeca 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -86,6 +86,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ + M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Make sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp index 579f160abd4..4ff322b5b54 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/S3/diskSettings.cpp @@ -95,7 +95,8 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura { S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), - context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects); + context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects, + context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging); S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 9aacbda3fbf..aff1b9db6fd 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -49,10 +49,12 @@ namespace DB::S3 PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( const String & force_region_, const RemoteHostFilter & remote_host_filter_, - unsigned int s3_max_redirects_) + unsigned int s3_max_redirects_, + bool enable_s3_requests_logging_) : force_region(force_region_) , remote_host_filter(remote_host_filter_) , s3_max_redirects(s3_max_redirects_) + , enable_s3_requests_logging(enable_s3_requests_logging_) { } @@ -87,16 +89,17 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() } -PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration) - : per_request_configuration(clientConfiguration.perRequestConfiguration) - , error_report(clientConfiguration.error_report) +PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) + : per_request_configuration(client_configuration.perRequestConfiguration) + , error_report(client_configuration.error_report) , timeouts(ConnectionTimeouts( - Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout. - Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout. - Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000) /// receive timeout. + Poco::Timespan(client_configuration.connectTimeoutMs * 1000), /// connection timeout. + Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// send timeout. + Poco::Timespan(client_configuration.requestTimeoutMs * 1000) /// receive timeout. )) - , remote_host_filter(clientConfiguration.remote_host_filter) - , s3_max_redirects(clientConfiguration.s3_max_redirects) + , remote_host_filter(client_configuration.remote_host_filter) + , s3_max_redirects(client_configuration.s3_max_redirects) + , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) { } @@ -119,7 +122,8 @@ void PocoHTTPClient::makeRequestInternal( Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); - LOG_TEST(log, "Make request to: {}", uri); + if (enable_s3_requests_logging) + LOG_TEST(log, "Make request to: {}", uri); enum class S3MetricType { @@ -251,7 +255,8 @@ void PocoHTTPClient::makeRequestInternal( if (request.GetContentBody()) { - LOG_TEST(log, "Writing request body."); + if (enable_s3_requests_logging) + LOG_TEST(log, "Writing request body."); if (attempt > 0) /// rewind content body buffer. { @@ -259,24 +264,28 @@ void PocoHTTPClient::makeRequestInternal( request.GetContentBody()->seekg(0); } auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream); - LOG_TEST(log, "Written {} bytes to request body", size); + if (enable_s3_requests_logging) + LOG_TEST(log, "Written {} bytes to request body", size); } - LOG_TEST(log, "Receiving response..."); + if (enable_s3_requests_logging) + LOG_TEST(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); watch.stop(); ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds()); int status_code = static_cast(poco_response.getStatus()); - LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); + if (enable_s3_requests_logging) + LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT) { auto location = poco_response.get("location"); remote_host_filter.checkURL(Poco::URI(location)); uri = location; - LOG_TEST(log, "Redirecting request to new location: {}", location); + if (enable_s3_requests_logging) + LOG_TEST(log, "Redirecting request to new location: {}", location); ProfileEvents::increment(select_metric(S3MetricType::Redirects)); @@ -286,13 +295,16 @@ void PocoHTTPClient::makeRequestInternal( response->SetResponseCode(static_cast(status_code)); response->SetContentType(poco_response.getContentType()); - WriteBufferFromOwnString headers_ss; - for (const auto & [header_name, header_value] : poco_response) + if (enable_s3_requests_logging) { - response->AddHeader(header_name, header_value); - headers_ss << header_name << ": " << header_value << "; "; + WriteBufferFromOwnString headers_ss; + for (const auto & [header_name, header_value] : poco_response) + { + response->AddHeader(header_name, header_value); + headers_ss << header_name << ": " << header_value << "; "; + } + LOG_TEST(log, "Received headers: {}", headers_ss.str()); } - LOG_TEST(log, "Received headers: {}", headers_ss.str()); if (status_code == 429 || status_code == 503) { // API throttling diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index defd029f05a..3897097b196 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -32,13 +32,14 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration String force_region; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; + bool enable_s3_requests_logging; void updateSchemeAndRegion(); std::function error_report; private: - PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_); + PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. friend ClientFactory; @@ -79,7 +80,7 @@ private: class PocoHTTPClient : public Aws::Http::HttpClient { public: - explicit PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration); + explicit PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration); ~PocoHTTPClient() override = default; std::shared_ptr MakeRequest( @@ -99,6 +100,7 @@ private: ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; + bool enable_s3_requests_logging; }; } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 3732b662ea2..95e67d5d6d6 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -61,7 +61,8 @@ const std::pair & convertLogLevel(Aws::U class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface { public: - AWSLogger() + explicit AWSLogger(bool enable_s3_requests_logging_) + :enable_s3_requests_logging(enable_s3_requests_logging_) { for (auto [tag, name] : S3_LOGGER_TAG_NAMES) tag_loggers[tag] = &Poco::Logger::get(name); @@ -71,7 +72,13 @@ public: ~AWSLogger() final = default; - Aws::Utils::Logging::LogLevel GetLogLevel() const final { return Aws::Utils::Logging::LogLevel::Trace; } + Aws::Utils::Logging::LogLevel GetLogLevel() const final + { + if (enable_s3_requests_logging) + return Aws::Utils::Logging::LogLevel::Trace; + else + return Aws::Utils::Logging::LogLevel::Info; + } void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final // NOLINT { @@ -100,6 +107,7 @@ public: private: Poco::Logger * default_logger; + bool enable_s3_requests_logging; std::unordered_map tag_loggers; }; @@ -535,7 +543,7 @@ public: /// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be /// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers. { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging); AddProvider(std::make_shared(aws_client_configuration)); } @@ -572,7 +580,7 @@ public: } else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true") { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging); /// See MakeDefaultHttpResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside @@ -692,7 +700,7 @@ namespace S3 { aws_options = Aws::SDKOptions{}; Aws::InitAPI(aws_options); - Aws::Utils::Logging::InitializeAWSLogging(std::make_shared()); + Aws::Utils::Logging::InitializeAWSLogging(std::make_shared(false)); Aws::Http::SetHttpClientFactory(std::make_shared()); } @@ -756,9 +764,10 @@ namespace S3 PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT const String & force_region, const RemoteHostFilter & remote_host_filter, - unsigned int s3_max_redirects) + unsigned int s3_max_redirects, + bool enable_s3_requestrs_logging) { - return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects); + return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requestrs_logging); } URI::URI(const Poco::URI & uri_) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 01f77cff820..98471f5b81f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -44,12 +44,14 @@ public: PocoHTTPClientConfiguration createClientConfiguration( const String & force_region, const RemoteHostFilter & remote_host_filter, - unsigned int s3_max_redirects); + unsigned int s3_max_redirects, + bool enable_s3_requestrs_logging); private: ClientFactory(); Aws::SDKOptions aws_options; + std::atomic s3_requests_logging_enabled; }; /** diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2ab553ad450..c981a055ab6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -878,7 +878,8 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, - ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects); + ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects, + ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging); client_configuration.endpointOverride = upd.uri.endpoint; client_configuration.maxConnections = upd.rw_settings.max_connections; From e5b395e0542ad723a2ead3f0702f0f6f4203dbdc Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 16 May 2022 17:33:38 +0000 Subject: [PATCH 027/150] Support ROLLUP and CUBE in GROUPING function --- src/Functions/grouping.cpp | 65 ++++++++++- src/Interpreters/ActionsVisitor.cpp | 63 ++++++---- src/Interpreters/ActionsVisitor.h | 13 ++- src/Interpreters/ExpressionAnalyzer.cpp | 19 ++- src/Interpreters/ExpressionAnalyzer.h | 3 +- src/Processors/QueryPlan/AggregatingStep.cpp | 11 ++ src/Processors/QueryPlan/AggregatingStep.h | 2 + src/Processors/QueryPlan/CubeStep.cpp | 3 +- src/Processors/QueryPlan/RollupStep.cpp | 3 +- src/Processors/Transforms/CubeTransform.cpp | 5 +- src/Processors/Transforms/CubeTransform.h | 1 + src/Processors/Transforms/RollupTransform.cpp | 5 +- src/Processors/Transforms/RollupTransform.h | 1 + src/Storages/VirtualColumnUtils.cpp | 2 +- ...02293_grouping_function_group_by.reference | 108 ++++++++++++++++++ .../02293_grouping_function_group_by.sql | 50 ++++++++ 16 files changed, 315 insertions(+), 39 deletions(-) diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp index eb63764947c..c6c6061307d 100644 --- a/src/Functions/grouping.cpp +++ b/src/Functions/grouping.cpp @@ -1,8 +1,11 @@ #include -#include "Columns/ColumnsNumber.h" +#include #include #include +#include #include +#include +#include #include #include #include @@ -45,7 +48,7 @@ public: return std::make_shared(); } - ColumnPtr executeSingleGroupingSet(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeOrdinaryGroupBy(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); @@ -65,16 +68,70 @@ public: return result; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeRollup( + const ColumnUInt64 * grouping_set_column, + const ColumnConst & argument_keys_column, + UInt64 keys, + size_t input_rows_count) const + { + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt64 set_index = grouping_set_column->get64(i); + + auto indexes = argument_keys_column[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (index.get() < keys - set_index ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + + ColumnPtr executeCube( + const ColumnUInt64 * grouping_set_column, + const ColumnConst & argument_keys_column, + UInt64 keys, + size_t input_rows_count) const + { + static constexpr auto ONE = static_cast(1); + auto result = std::make_shared()->createColumn(); + auto mask_base = (ONE << keys) - 1; + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt64 set_index = grouping_set_column->get64(i); + auto mask = mask_base - set_index; + auto indexes = argument_keys_column[i].get(); + UInt64 value = 0; + for (auto index : indexes) + value = (value << 1) + (mask & (ONE << (keys - index.get() - 1)) ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t input_rows_count) const override { if (arguments.size() == 2) - return executeSingleGroupingSet(arguments, input_rows_count); + return executeOrdinaryGroupBy(arguments, input_rows_count); auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); auto masks = (*grouping_set_map_column)[0].get(); + auto grouping_set_map_elem_type = applyVisitor(FieldToDataType(), masks[0]); + if (!isString(grouping_set_map_elem_type)) + { + bool is_rollup = masks[0].get() == 0; + auto keys = masks[1].get(); + if (is_rollup) + return executeRollup(grouping_set_column, *argument_keys_column, keys, input_rows_count); + else + return executeCube(grouping_set_column, *argument_keys_column, keys, input_rows_count); + } auto result = std::make_shared()->createColumn(); for (size_t i = 0; i < input_rows_count; ++i) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 5bececb70ae..70493e5fefc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -62,6 +62,7 @@ namespace ErrorCodes extern const int INCORRECT_ELEMENT_OF_SET; extern const int BAD_ARGUMENTS; extern const int DUPLICATE_COLUMN; + extern const int LOGICAL_ERROR; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -478,7 +479,7 @@ ActionsMatcher::Data::Data( bool no_makeset_, bool only_consts_, bool create_source_for_in_, - bool has_grouping_set_column_) + GroupByKind group_by_kind_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -491,7 +492,7 @@ ActionsMatcher::Data::Data( , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) - , has_grouping_set_column(has_grouping_set_column_) + , group_by_kind(group_by_kind_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) @@ -844,27 +845,47 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { ColumnWithTypeAndName column; column.name = "__grouping_set_map"; - if (data.has_grouping_set_column) + switch (data.group_by_kind) { - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) + case GroupByKind::GROUPING_SETS: { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); + size_t map_size = data.aggregation_keys.size() + 1; + column.type = std::make_shared(std::make_shared(map_size)); + Array maps_per_set; + for (auto & grouping_set : data.grouping_set_keys) + { + std::string key_map(map_size, '0'); + for (auto index : grouping_set) + key_map[index] = '1'; + maps_per_set.push_back(key_map); + } + auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); + grouping_set_map_column->insert(maps_per_set); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + break; } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - } - else - { - column.type = std::make_shared(); - auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + case GroupByKind::ROLLUP: + case GroupByKind::CUBE: + { + column.type = std::make_shared(std::make_shared()); + auto grouping_set_map_column = ColumnArray::create(ColumnUInt64::create()); + Array kind_and_keys_size; + kind_and_keys_size.push_back(data.group_by_kind == GroupByKind::ROLLUP ? 0 : 1); + kind_and_keys_size.push_back(data.aggregation_keys.size()); + grouping_set_map_column->insert(kind_and_keys_size); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + break; + } + case GroupByKind::ORDINARY: + { + column.type = std::make_shared(); + auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); + column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); } data.addColumn(column); @@ -886,7 +907,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addColumn(column); } - if (data.has_grouping_set_column) + if (data.group_by_kind != GroupByKind::ORDINARY) { data.addFunction( FunctionFactory::instance().get("grouping", data.getContext()), diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 3f7f6b5b127..5fd228ba836 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -78,6 +78,15 @@ class ASTIdentifier; class ASTFunction; class ASTLiteral; +enum class GroupByKind +{ + NONE, + ORDINARY, + ROLLUP, + CUBE, + GROUPING_SETS, +}; + /// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too. class ActionsMatcher { @@ -97,7 +106,7 @@ public: bool no_makeset; bool only_consts; bool create_source_for_in; - bool has_grouping_set_column; + GroupByKind group_by_kind; size_t visit_depth; ScopeStack actions_stack; @@ -122,7 +131,7 @@ public: bool no_makeset_, bool only_consts_, bool create_source_for_in_, - bool has_grouping_set_column_); + GroupByKind group_by_kind_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9c74693e6a2..f7f67c28f93 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -331,12 +331,19 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) NameToIndexMap unique_keys; ASTs & group_asts = group_by_ast->children; + if (select_query->group_by_with_rollup) + group_by_kind = GroupByKind::ROLLUP; + else if (select_query->group_by_with_cube) + group_by_kind = GroupByKind::CUBE; + else if (select_query->group_by_with_grouping_sets && group_asts.size() > 1) + group_by_kind = GroupByKind::GROUPING_SETS; + else + group_by_kind = GroupByKind::ORDINARY; + /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column /// With set number, which is used as an additional key at the stage of merging aggregating data. - bool process_grouping_sets = select_query->group_by_with_grouping_sets && group_asts.size() > 1; - if (process_grouping_sets) + if (group_by_kind != GroupByKind::ORDINARY) aggregated_columns.emplace_back("__grouping_set", std::make_shared()); - need_grouping_set_column = select_query->group_by_with_rollup || select_query->group_by_with_cube || process_grouping_sets; for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { @@ -605,7 +612,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ false /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - need_grouping_set_column); + group_by_kind); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -628,7 +635,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - need_grouping_set_column); + group_by_kind); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -652,7 +659,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( false /* no_makeset */, only_consts, true /* create_source_for_in */, - need_grouping_set_column); + group_by_kind); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 5db4fda0fcf..fb28c08ad23 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -78,7 +79,7 @@ struct ExpressionAnalyzerData /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. TemporaryTablesMapping external_tables; - bool need_grouping_set_column = false; + GroupByKind group_by_kind = GroupByKind::NONE; }; diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index b830c7899bb..0028088d03f 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -35,6 +35,17 @@ static ITransformingStep::Traits getTraits() }; } +Block appendGroupingSetColumn(Block header) +{ + Block res; + res.insert({std::make_shared(), "__grouping_set"}); + + for (auto & col : header) + res.insert(std::move(col)); + + return res; +} + static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) { if (params.empty()) diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index b933daaa474..4dd3d956350 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -25,6 +25,8 @@ struct GroupingSetsParams using GroupingSetsParamsList = std::vector; +Block appendGroupingSetColumn(Block header); + /// Aggregation. See AggregatingTransform. class AggregatingStep : public ITransformingStep { diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 23c5115ec68..43a6491157a 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB @@ -22,7 +23,7 @@ static ITransformingStep::Traits getTraits() } CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) - : ITransformingStep(input_stream_, params_->getHeader(), getTraits()) + : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) , params(std::move(params_)) { /// Aggregation keys are distinct diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index acaeb2bc9a7..2961ef5ddbd 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -22,7 +23,7 @@ static ITransformingStep::Traits getTraits() } RollupStep::RollupStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) - : ITransformingStep(input_stream_, params_->getHeader(), getTraits()) + : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) , params(std::move(params_)) { /// Aggregation keys are distinct diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index 456eccc732f..f185e7565ea 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -9,7 +10,7 @@ namespace ErrorCodes } CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_) - : IAccumulatingTransform(std::move(header), params_->getHeader()) + : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) , params(std::move(params_)) , keys(params->params.keys) { @@ -74,6 +75,8 @@ Chunk CubeTransform::generate() } finalizeChunk(gen_chunk); + if (!gen_chunk.empty()) + gen_chunk.addColumn(0, ColumnUInt64::create(gen_chunk.getNumRows(), grouping_set++)); return gen_chunk; } diff --git a/src/Processors/Transforms/CubeTransform.h b/src/Processors/Transforms/CubeTransform.h index 6d0e2338174..b6f60af6aca 100644 --- a/src/Processors/Transforms/CubeTransform.h +++ b/src/Processors/Transforms/CubeTransform.h @@ -28,6 +28,7 @@ private: Columns current_zero_columns; UInt64 mask = 0; + UInt64 grouping_set = 0; Chunk merge(Chunks && chunks, bool final); }; diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index fb51b5f6b45..2355e35f8fd 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -1,11 +1,12 @@ #include #include +#include namespace DB { RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr params_) - : IAccumulatingTransform(std::move(header), params_->getHeader()) + : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) , params(std::move(params_)) , keys(params->params.keys) { @@ -57,6 +58,8 @@ Chunk RollupTransform::generate() } finalizeChunk(gen_chunk); + if (!gen_chunk.empty()) + gen_chunk.addColumn(0, ColumnUInt64::create(gen_chunk.getNumRows(), set_counter++)); return gen_chunk; } diff --git a/src/Processors/Transforms/RollupTransform.h b/src/Processors/Transforms/RollupTransform.h index fd435740a63..e60c7e12de1 100644 --- a/src/Processors/Transforms/RollupTransform.h +++ b/src/Processors/Transforms/RollupTransform.h @@ -23,6 +23,7 @@ private: Chunks consumed_chunks; Chunk rollup_chunk; size_t last_removed_key = 0; + size_t set_counter = 0; Chunk merge(Chunks && chunks, bool final); }; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index c1824206b60..dd6c30e3c79 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, false); + context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, GroupByKind::NONE); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 38578d6ad1d..0285611b9fa 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -18,3 +18,111 @@ 7 1 1 8 1 1 9 1 1 +0 0 +0 4 +0 6 +1 4 +1 6 +2 4 +2 6 +3 4 +3 6 +4 4 +4 6 +5 4 +5 6 +6 4 +6 6 +7 4 +7 6 +8 4 +8 6 +9 4 +9 6 +0 0 +0 4 +0 6 +1 4 +1 6 +2 4 +2 6 +3 4 +3 6 +4 4 +4 6 +5 4 +5 6 +6 4 +6 6 +7 4 +7 6 +8 4 +8 6 +9 4 +9 6 +0 0 +0 1 +0 1 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 +0 0 +0 1 +0 1 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 +0 5 +0 6 +1 5 +1 6 +2 5 +2 6 +3 5 +3 6 +4 5 +4 6 +5 5 +5 6 +6 5 +6 6 +7 5 +7 6 +8 5 +8 6 +9 5 +9 6 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index 5b12c34adac..1b0fcdb9289 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -16,3 +16,53 @@ GROUP BY number, number % 2 ORDER BY number; + +SELECT + number, + grouping(number, number % 2, number % 3) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH ROLLUP +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2, number % 3) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH CUBE +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) + 3 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +HAVING grouping(number) != 0 +ORDER BY + number, gr; From ceb7249916c9dfbca971b7367a9504bfa86bd6e5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 May 2022 19:57:48 +0200 Subject: [PATCH 028/150] Fix stupid buge --- src/IO/S3/PocoHTTPClient.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index d689d6b279c..cb8c91990a0 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -311,6 +311,11 @@ void PocoHTTPClient::makeRequestInternal( } LOG_TEST(log, "Received headers: {}", headers_ss.str()); } + else + { + for (const auto & [header_name, header_value] : poco_response) + response->AddHeader(header_name, header_value); + } if (status_code == 429 || status_code == 503) { // API throttling From 466a02ba8fab99b308523a30bdb5de8057528f67 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 May 2022 21:31:40 +0200 Subject: [PATCH 029/150] Follow HDFS --- src/Disks/DiskObjectStorage.cpp | 74 +++++++++++++- src/Disks/DiskObjectStorage.h | 13 +-- src/Disks/HDFSObjectStorage.cpp | 164 ++++++++++++++++++++++++++++++++ src/Disks/HDFSObjectStorage.h | 120 +++++++++++++++++++++++ src/Disks/IDisk.h | 9 +- src/Disks/IDiskObjectStorage.h | 8 ++ src/Disks/IObjectStorage.h | 2 + src/Disks/S3/diskSettings.cpp | 1 - src/Disks/S3/registerDiskS3.cpp | 8 +- src/Disks/S3ObjectStorage.cpp | 1 + src/Disks/S3ObjectStorage.h | 6 +- src/Disks/WriteMode.h | 15 +++ 12 files changed, 394 insertions(+), 27 deletions(-) create mode 100644 src/Disks/HDFSObjectStorage.cpp create mode 100644 src/Disks/HDFSObjectStorage.h create mode 100644 src/Disks/IDiskObjectStorage.h create mode 100644 src/Disks/WriteMode.h diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index b2d2bf23652..b0679051eec 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -37,6 +37,74 @@ static String revisionToString(UInt64 revision) return std::bitset<64>(revision).to_string(); } +namespace +{ + +/// Runs tasks asynchronously using thread pool. +class AsyncThreadPoolExecutor : public Executor +{ +public: + AsyncThreadPoolExecutor(const String & name_, int thread_pool_size) + : name(name_) + , pool(ThreadPool(thread_pool_size)) {} + + std::future execute(std::function task) override + { + auto promise = std::make_shared>(); + pool.scheduleOrThrowOnError( + [promise, task]() + { + try + { + task(); + promise->set_value(); + } + catch (...) + { + tryLogCurrentException("Failed to run async task"); + + try + { + promise->set_exception(std::current_exception()); + } + catch (...) {} + } + }); + + return promise->get_future(); + } + + void setMaxThreads(size_t threads) + { + pool.setMaxThreads(threads); + } + +private: + String name; + ThreadPool pool; +}; + +} + +DiskObjectStorage::DiskObjectStorage( + const String & name_, + const String & remote_fs_root_path_, + const String & log_name, + DiskPtr metadata_disk_, + ObjectStoragePtr && object_storage_, + DiskType disk_type_, + bool send_metadata_, + uint64_t thread_pool_size) + : IDisk(std::make_unique(log_name, thread_pool_size)) + , name(name_) + , remote_fs_root_path(remote_fs_root_path_) + , log (&Poco::Logger::get(log_name)) + , metadata_disk(metadata_disk_) + , disk_type(disk_type_) + , object_storage(std::move(object_storage_)) + , send_metadata(send_metadata_) + , metadata_helper(std::make_unique(this, ReadSettings{})) +{} DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) { @@ -715,7 +783,11 @@ std::unique_ptr DiskObjectStorage::writeFile( void DiskObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) { - object_storage->applyNewSettings(config, "storage_configuration.disks." + name, context_); + const auto config_prefix = "storage_configuration.disks." + name; + object_storage->applyNewSettings(config, config_prefix, context_); + + if (AsyncThreadPoolExecutor * exec = dynamic_cast(&getExecutor())) + exec->setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16)); } void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index a67cc78b82b..f1687fe19b6 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -14,6 +14,7 @@ namespace DB class DiskObjectStorageMetadataHelper; + class DiskObjectStorage : public IDisk { @@ -28,16 +29,8 @@ public: DiskPtr metadata_disk_, ObjectStoragePtr && object_storage_, DiskType disk_type_, - bool send_metadata_) - : name(name_) - , remote_fs_root_path(remote_fs_root_path_) - , log (&Poco::Logger::get(log_name)) - , metadata_disk(metadata_disk_) - , disk_type(disk_type_) - , object_storage(std::move(object_storage_)) - , send_metadata(send_metadata_) - , metadata_helper(std::make_unique(this, ReadSettings{})) - {} + bool send_metadata_, + uint64_t thread_pool_size); DiskType getType() const override { return disk_type; } diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp new file mode 100644 index 00000000000..646ec6c8bd4 --- /dev/null +++ b/src/Disks/HDFSObjectStorage.cpp @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#if USE_HDFS + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; +} + +void HDFSObjectStorage::shutdown() +{ +} + +void HDFSObjectStorage::startup() +{ +} + +bool HDFSObjectStorage::exists(const std::string & hdfs_uri) const +{ + const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); + const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); + return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); + +} + +std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto buf = std::make_unique(path, path, config, read_settings.remote_fs_buffer_size); + + return std::make_unique(std::move(buf), settings->min_bytes_for_seek); +} + +std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto hdfs_impl = std::make_unique(config, common_path_prefix, common_path_prefix, blobs_to_read, read_settings); + auto buf = std::make_unique(std::move(hdfs_impl)); + return std::make_unique(std::move(buf), settings->min_bytes_for_seek); +} + + /// Open the file for write and return WriteBufferFromFileBase object. +std::unique_ptr HDFSObjectStorage::writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional attributes, + FinalizeCallback && finalize_callback, + size_t buf_size, + const WriteSettings &) +{ + if (attributes.has_value()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + + /// Single O_WRONLY in libhdfs adds O_TRUNC + auto hdfs_buffer = std::make_unique(path, + config, settings->replication, buf_size, + mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); + + return std::make_unique(std::move(hdfs_buffer), std::move(finalize_callback), path); +} + + +void HDFSObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +{ + const size_t begin_of_path = path.find('/', path.find("//") + 2); + int32_t num_entries; + auto * files_list = hdfsListDirectory(hdfs_fs.get(), path.substr(begin_of_path).c_str(), &num_entries); + if (num_entries == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + + for (int32_t i = 0; i < num_entries; ++i) + children.emplace_back(files_list[i].mName, files_list[i].mSize); +} + +/// Remove file. Throws exception if file doesn't exists or it's a directory. +void HDFSObjectStorage::removeObject(const std::string & path) +{ + const size_t begin_of_path = path.find('/', path.find("//") + 2); + + /// Add path from root to file name + int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0); + if (res == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + +} + +void HDFSObjectStorage::removeObjects(const std::vector & paths) +{ + for (const auto & hdfs_path : paths) + { + const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); + + /// Add path from root to file name + int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); + if (res == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + } +} + +void HDFSObjectStorage::removeObjectIfExists(const std::string & path) +{ + if (exists(path)) + removeObject(path); +} + +void HDFSObjectStorage::removeObjectsIfExist(const std::vector & paths) +{ + for (const auto & hdfs_path : paths) + { + if (!exists(hdfs_path)) + continue; + + const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); + + /// Add path from root to file name + int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); + if (res == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + } +} + +ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const +{ + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); +} + +void HDFSObjectStorage::copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes) +{ + if (object_to_attributes.has_value()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + + auto in = readObject(object_from); + auto out = writeObject(object_to); + copyData(*in, *out); + out->finalize(); +} + + +} + +#endif diff --git a/src/Disks/HDFSObjectStorage.h b/src/Disks/HDFSObjectStorage.h new file mode 100644 index 00000000000..397741d2c4a --- /dev/null +++ b/src/Disks/HDFSObjectStorage.h @@ -0,0 +1,120 @@ +#pragma once +#include + + +#if USE_HDFS + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +struct HDFSObjectStorageSettings +{ + + HDFSObjectStorageSettings() = default; + + size_t min_bytes_for_seek; + int objects_chunk_size_to_delete; + int replication; + + HDFSObjectStorageSettings( + int min_bytes_for_seek_, + int objects_chunk_size_to_delete_, + int replication_) + : min_bytes_for_seek(min_bytes_for_seek_) + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + , replication(replication_) + {} +}; + + +class HDFSObjectStorage : public IObjectStorage +{ +public: + + using SettingsPtr = std::unique_ptr; + + HDFSObjectStorage( + FileCachePtr && cache_, + const String & hdfs_root_path_, + SettingsPtr settings_, + const Poco::Util::AbstractConfiguration & config_) + : IObjectStorage(std::move(cache_)) + , config(config_) + , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) + , hdfs_fs(createHDFSFS(hdfs_builder.get())) + , settings(std::move(settings_)) + {} + + bool exists(const std::string & hdfs_uri) const override; + + std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + /// Open the file for write and return WriteBufferFromFileBase object. + std::unique_ptr writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) override; + + void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const std::string & path) override; + + void removeObjects(const std::vector & paths) override; + + void removeObjectIfExists(const std::string & path) override; + + void removeObjectsIfExist(const std::vector & paths) override; + + ObjectMetadata getObjectMetadata(const std::string & path) const override; + + void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) override; + + void shutdown() override; + + void startup() override; + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + + String getObjectsNamespace() const override { return bucket; } + + std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + + +private: + const Poco::Util::AbstractConfiguration & config; + + HDFSBuilderWrapper hdfs_builder; + HDFSFSPtr hdfs_fs; + + SettingsPtr settings; + + +}; + +} + +#endif diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index e4a0b84448c..51e380df6df 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -49,14 +50,6 @@ class ReadBufferFromFileBase; class WriteBufferFromFileBase; class MMappedFileCache; -/** - * Mode of opening a file for write. - */ -enum class WriteMode -{ - Rewrite, - Append -}; /** * Provide interface for reservation. diff --git a/src/Disks/IDiskObjectStorage.h b/src/Disks/IDiskObjectStorage.h new file mode 100644 index 00000000000..90794301e54 --- /dev/null +++ b/src/Disks/IDiskObjectStorage.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +namespace DB +{ + +} diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h index 64ba6e75281..e5eb08f145d 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/IObjectStorage.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -80,6 +81,7 @@ public: /// Open the file for write and return WriteBufferFromFileBase object. virtual std::unique_ptr writeObject( /// NOLINT const std::string & path, + WriteMode mode, std::optional attributes = {}, FinalizeCallback && finalize_callback = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp index 4ff322b5b54..9f170799bb9 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/S3/diskSettings.cpp @@ -22,7 +22,6 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC return std::make_unique( rw_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".thread_pool_size", 16), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index de1194d6daf..fda1a1f51b0 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -80,13 +80,15 @@ void registerDiskS3(DiskFactory & factory) FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); ObjectStoragePtr s3_storage = std::make_unique( std::move(cache), getClient(config, config_prefix, context), getSettings(config, config_prefix, context), uri.version_id, uri.bucket); + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); + std::shared_ptr s3disk = std::make_shared( name, uri.key, @@ -94,7 +96,8 @@ void registerDiskS3(DiskFactory & factory) metadata_disk, std::move(s3_storage), DiskType::S3, - send_metadata); + send_metadata, + copy_thread_pool_size); /// This code is used only to check access to the corresponding disk. if (!config.getBool(config_prefix + ".skip_access_check", false)) @@ -122,7 +125,6 @@ void registerDiskS3(DiskFactory & factory) disk_result = wrapWithCache(disk_result, "s3-cache", cache_path, metadata_path); } - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "DONE DISK"); return std::make_shared(disk_result); }; factory.registerDiskType("s3", creator); diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 3f26937a29b..3feea30ace3 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -133,6 +133,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::unique_ptr S3ObjectStorage::writeObject( /// NOLINT const std::string & path, + WriteMode /* mode */, // S3 doesn't support append, only rewrite std::optional attributes, FinalizeCallback && finalize_callback, size_t buf_size, diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index fcc99ae1d91..12a8930c596 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -1,10 +1,10 @@ #pragma once -#include #include #if USE_AWS_S3 +#include #include #include #include @@ -22,12 +22,10 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings( const S3Settings::ReadWriteSettings & s3_settings_, uint64_t min_bytes_for_seek_, - uint64_t thread_pool_size_, int32_t list_object_keys_size_, int32_t objects_chunk_size_to_delete_) : s3_settings(s3_settings_) , min_bytes_for_seek(min_bytes_for_seek_) - , thread_pool_size(thread_pool_size_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} @@ -35,7 +33,6 @@ struct S3ObjectStorageSettings S3Settings::ReadWriteSettings s3_settings; uint64_t min_bytes_for_seek; - uint64_t thread_pool_size; int32_t list_object_keys_size; int32_t objects_chunk_size_to_delete; }; @@ -75,6 +72,7 @@ public: /// Open the file for write and return WriteBufferFromFileBase object. std::unique_ptr writeObject( /// NOLINT const std::string & path, + WriteMode mode, std::optional attributes = {}, FinalizeCallback && finalize_callback = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Disks/WriteMode.h b/src/Disks/WriteMode.h new file mode 100644 index 00000000000..4a73e92ccab --- /dev/null +++ b/src/Disks/WriteMode.h @@ -0,0 +1,15 @@ +#pragma once + +namespace DB +{ + +/** + * Mode of opening a file for write. + */ +enum class WriteMode +{ + Rewrite, + Append +}; + +} From 1c90f326c24dc866651c11a0c8ad6606235af220 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 15:25:05 +0200 Subject: [PATCH 030/150] Remove DiskHDFS --- src/Disks/DiskObjectStorage.cpp | 6 +- src/Disks/HDFS/DiskHDFS.cpp | 143 ---------------------------- src/Disks/HDFS/DiskHDFS.h | 84 ---------------- src/Disks/HDFS/registerDiskHDFS.cpp | 55 +++++++++++ src/Disks/HDFSObjectStorage.cpp | 17 +++- src/Disks/HDFSObjectStorage.h | 3 +- src/Disks/IObjectStorage.cpp | 2 +- src/Disks/S3ObjectStorage.cpp | 5 +- 8 files changed, 77 insertions(+), 238 deletions(-) delete mode 100644 src/Disks/HDFS/DiskHDFS.cpp delete mode 100644 src/Disks/HDFS/DiskHDFS.h create mode 100644 src/Disks/HDFS/registerDiskHDFS.cpp diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index b0679051eec..bfec350caba 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -777,7 +777,7 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, object_attributes, create_metadata_callback, buf_size, settings); + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, WriteMode::Rewrite, object_attributes, create_metadata_callback, buf_size, settings); } @@ -848,7 +848,7 @@ DiskObjectStorageReservation::~DiskObjectStorageReservation() void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const { const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; - auto buf = disk->object_storage->writeObject(path, metadata); + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); buf->write('0'); buf->finalize(); } @@ -892,7 +892,7 @@ void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) con { auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; - auto buf = disk->object_storage->writeObject(path); + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite); writeIntText(version, *buf); buf->finalize(); diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp deleted file mode 100644 index b8e482f623c..00000000000 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include - -#if USE_HDFS - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; -} - - -DiskHDFS::DiskHDFS( - const String & disk_name_, - const String & hdfs_root_path_, - SettingsPtr settings_, - DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_) - : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, nullptr, "DiskHDFS", settings_->thread_pool_size) - , config(config_) - , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) - , hdfs_fs(createHDFSFS(hdfs_builder.get())) - , settings(std::move(settings_)) -{ -} - - -std::unique_ptr DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional, std::optional) const -{ - auto metadata = readMetadata(path); - - LOG_TEST(log, - "Read from file by path: {}. Existing HDFS objects: {}", - backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - - auto hdfs_impl = std::make_unique(config, remote_fs_root_path, remote_fs_root_path, metadata.remote_fs_objects, read_settings); - auto buf = std::make_unique(std::move(hdfs_impl)); - return std::make_unique(std::move(buf), settings->min_bytes_for_seek); -} - - -std::unique_ptr DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) -{ - /// Path to store new HDFS object. - std::string file_name = getRandomName(); - std::string hdfs_path = fs::path(remote_fs_root_path) / file_name; - - LOG_TRACE(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", - backQuote(metadata_disk->getPath() + path), hdfs_path); - - /// Single O_WRONLY in libhdfs adds O_TRUNC - auto hdfs_buffer = std::make_unique(hdfs_path, - config, settings->replication, buf_size, - mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); - auto create_metadata_callback = [this, path, mode, file_name] (size_t count) - { - readOrCreateUpdateAndStoreMetadata(path, mode, false, [file_name, count] (Metadata & metadata) { metadata.addObject(file_name, count); return true; }); - }; - - return std::make_unique(std::move(hdfs_buffer), std::move(create_metadata_callback), hdfs_path); -} - -void DiskHDFS::removeFromRemoteFS(const std::vector & paths) -{ - for (const auto & hdfs_path : paths) - { - const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); - - /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); - if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); - } -} - -bool DiskHDFS::checkUniqueId(const String & hdfs_uri) const -{ - if (!boost::algorithm::starts_with(hdfs_uri, remote_fs_root_path)) - return false; - const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); - const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); - return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); -} - -namespace -{ -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings) -{ - return std::make_unique( - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - settings.hdfs_replication); -} -} - -void registerDiskHDFS(DiskFactory & factory) -{ - auto creator = [](const String & name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextPtr context_, - const DisksMap & /*map*/) -> DiskPtr - { - String uri{config.getString(config_prefix + ".endpoint")}; - checkHDFSURL(uri); - - if (uri.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); - - auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; - - return std::make_shared( - name, uri, - getSettings(config, config_prefix, context_->getSettingsRef()), - metadata_disk, config); - }; - - factory.registerDiskType("hdfs", creator); -} - -} -#endif diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h deleted file mode 100644 index 5c6e011dc96..00000000000 --- a/src/Disks/HDFS/DiskHDFS.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include - -#if USE_HDFS - -#include -#include -#include -#include - - -namespace DB -{ - -struct DiskHDFSSettings -{ - size_t min_bytes_for_seek; - int thread_pool_size; - int objects_chunk_size_to_delete; - int replication; - - DiskHDFSSettings( - int min_bytes_for_seek_, - int thread_pool_size_, - int objects_chunk_size_to_delete_, - int replication_) - : min_bytes_for_seek(min_bytes_for_seek_) - , thread_pool_size(thread_pool_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) - , replication(replication_) {} -}; - - -/** - * Storage for persisting data in HDFS and metadata on the local disk. - * Files are represented by file in local filesystem (clickhouse_root/disks/disk_name/path/to/file) - * that contains HDFS object key with actual data. - */ -class DiskHDFS final : public IDiskRemote -{ -public: - using SettingsPtr = std::unique_ptr; - - DiskHDFS( - const String & disk_name_, - const String & hdfs_root_path_, - SettingsPtr settings_, - DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_); - - DiskType getType() const override { return DiskType::HDFS; } - bool isRemote() const override { return true; } - - bool supportZeroCopyReplication() const override { return true; } - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; - - void removeFromRemoteFS(const std::vector & paths) override; - - /// Check file exists and ClickHouse has an access to it - /// Overrode in remote disk - /// Required for remote disk to ensure that replica has access to data written by other node - bool checkUniqueId(const String & hdfs_uri) const override; - -private: - String getRandomName() { return toString(UUIDHelpers::generateV4()); } - - const Poco::Util::AbstractConfiguration & config; - - HDFSBuilderWrapper hdfs_builder; - HDFSFSPtr hdfs_fs; - - SettingsPtr settings; -}; - -} -#endif diff --git a/src/Disks/HDFS/registerDiskHDFS.cpp b/src/Disks/HDFS/registerDiskHDFS.cpp new file mode 100644 index 00000000000..f67f6fbb440 --- /dev/null +++ b/src/Disks/HDFS/registerDiskHDFS.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +void registerDiskHDFS(DiskFactory & factory) +{ + auto creator = [](const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context_, + const DisksMap & /*map*/) -> DiskPtr + { + String uri{config.getString(config_prefix + ".endpoint")}; + checkHDFSURL(uri); + + if (uri.back() != '/') + throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); + + std::unique_ptr settings = std::make_unique( + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + context_->getSettingsRef().hdfs_replication + ); + FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context_); + + ObjectStoragePtr hdfs_storage = std::make_unique(std::move(cache), uri, std::move(settings), config); + + auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; + uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); + + return std::make_shared( + name, + uri, + "DiskHDFS", + metadata_disk, + std::move(hdfs_storage), + DiskType::HDFS, + /* send_metadata = */ false, + copy_thread_pool_size); + }; + + factory.registerDiskType("hdfs", creator); +} + +} diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index 646ec6c8bd4..ad3ecf47bbb 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -1,5 +1,7 @@ #include + #include +#include #include #include #include @@ -42,9 +44,7 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT std::optional, std::optional) const { - auto buf = std::make_unique(path, path, config, read_settings.remote_fs_buffer_size); - - return std::make_unique(std::move(buf), settings->min_bytes_for_seek); + return std::make_unique(path, path, config, read_settings.remote_fs_buffer_size); } std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT @@ -153,12 +153,21 @@ void HDFSObjectStorage::copyObject( /// NOLINT throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); auto in = readObject(object_from); - auto out = writeObject(object_to); + auto out = writeObject(object_to, WriteMode::Rewrite); copyData(*in, *out); out->finalize(); } +void HDFSObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr) +{ +} + +std::unique_ptr HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr) +{ + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning"); +} + } #endif diff --git a/src/Disks/HDFSObjectStorage.h b/src/Disks/HDFSObjectStorage.h index 397741d2c4a..3c1bac02ee3 100644 --- a/src/Disks/HDFSObjectStorage.h +++ b/src/Disks/HDFSObjectStorage.h @@ -99,11 +99,10 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; - String getObjectsNamespace() const override { return bucket; } + String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; - private: const Poco::Util::AbstractConfiguration & config; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp index 538cc702791..1997022d05c 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/IObjectStorage.cpp @@ -41,7 +41,7 @@ void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object copyObject(object_from, object_to, object_to_attributes); auto in = readObject(object_from); - auto out = object_storage_to.writeObject(object_to); + auto out = object_storage_to.writeObject(object_to, WriteMode::Rewrite); copyData(*in, *out); out->finalize(); } diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 3feea30ace3..fe7c73b20cb 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -133,12 +133,15 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::unique_ptr S3ObjectStorage::writeObject( /// NOLINT const std::string & path, - WriteMode /* mode */, // S3 doesn't support append, only rewrite + WriteMode mode, // S3 doesn't support append, only rewrite std::optional attributes, FinalizeCallback && finalize_callback, size_t buf_size, const WriteSettings & write_settings) { + if (mode != WriteMode::Rewrite) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); + bool cache_on_write = cache && fs::path(path).extension() != ".tmp" && write_settings.enable_filesystem_cache_on_write_operations From f9cd8208540a9d6783c732b0d0dcdea1d2910211 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 15:39:46 +0200 Subject: [PATCH 031/150] Fix style --- src/Common/ErrorCodes.cpp | 1 + src/Disks/HDFSObjectStorage.cpp | 10 +++++----- src/Disks/S3ObjectStorage.cpp | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index eb84e24b713..aabc3c8e8dc 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -624,6 +624,7 @@ M(653, CANNOT_PARSE_BACKUP_SETTINGS) \ M(654, WRONG_BACKUP_SETTINGS) \ M(655, FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE) \ + M(656, HDFS_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index ad3ecf47bbb..cbd89bcca88 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -19,7 +19,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int LOGICAL_ERROR; + extern const int HDFS_ERROR; } void HDFSObjectStorage::shutdown() @@ -86,7 +86,7 @@ void HDFSObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & c int32_t num_entries; auto * files_list = hdfsListDirectory(hdfs_fs.get(), path.substr(begin_of_path).c_str(), &num_entries); if (num_entries == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + path); for (int32_t i = 0; i < num_entries; ++i) children.emplace_back(files_list[i].mName, files_list[i].mSize); @@ -100,7 +100,7 @@ void HDFSObjectStorage::removeObject(const std::string & path) /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0); if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + path); } @@ -113,7 +113,7 @@ void HDFSObjectStorage::removeObjects(const std::vector & paths) /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); } } @@ -135,7 +135,7 @@ void HDFSObjectStorage::removeObjectsIfExist(const std::vector & pa /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); } } diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index fe7c73b20cb..f09bbc28474 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -34,6 +34,7 @@ namespace DB namespace ErrorCodes { extern const int S3_ERROR; + extern const int BAD_ARGUMENTS; } namespace From 5872781ac60678b3792fbb711fe47ed7a7528239 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 15:50:30 +0200 Subject: [PATCH 032/150] Merge with master --- src/Common/ErrorCodes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index de8b5ec19f8..203c4636b72 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -627,7 +627,7 @@ M(656, MEILISEARCH_EXCEPTION) \ M(657, UNSUPPORTED_MEILISEARCH_TYPE) \ M(658, MEILISEARCH_MISSING_SOME_COLUMNS) \ - M(656, HDFS_ERROR) \ + M(659, HDFS_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ From 19462bdf9e96fd1271a96e827f683c656d907a56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 May 2022 14:31:28 +0200 Subject: [PATCH 033/150] Drop filesystem cache before test --- tests/queries/1_stateful/00170_s3_cache.reference | 1 + tests/queries/1_stateful/00170_s3_cache.sql | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/1_stateful/00170_s3_cache.reference b/tests/queries/1_stateful/00170_s3_cache.reference index 96b02595c2e..04d610bc8d2 100644 --- a/tests/queries/1_stateful/00170_s3_cache.reference +++ b/tests/queries/1_stateful/00170_s3_cache.reference @@ -2,6 +2,7 @@ SET enable_filesystem_cache_on_write_operations=0; SET max_memory_usage='20G'; +SYSTEM DROP FILESYSTEM CACHE; SELECT count() FROM test.hits_s3; 8873898 SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; diff --git a/tests/queries/1_stateful/00170_s3_cache.sql b/tests/queries/1_stateful/00170_s3_cache.sql index febabb1c9e4..6549bcf5479 100644 --- a/tests/queries/1_stateful/00170_s3_cache.sql +++ b/tests/queries/1_stateful/00170_s3_cache.sql @@ -2,8 +2,11 @@ -- { echo } +SYSTEM DROP FILESYSTEM CACHE; + SET enable_filesystem_cache_on_write_operations=0; SET max_memory_usage='20G'; + SELECT count() FROM test.hits_s3; SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; From 41f7424ba68d835eb66e1a5224384cff799cce4a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 May 2022 17:13:17 +0200 Subject: [PATCH 034/150] Improve deletion logic --- src/Disks/S3ObjectStorage.cpp | 97 +++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 33 deletions(-) diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index f09bbc28474..9c33553c6b0 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -60,6 +60,19 @@ void throwIfError(const Aws::Utils::Outcome & response) } } +template +void logIfError(const Aws::Utils::Outcome & response, std::function && msg) +{ + try + { + throwIfError(response); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, msg()); + } +} + } Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const @@ -212,26 +225,34 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) return; auto client_ptr = client.get(); - std::vector keys; - keys.reserve(paths.size()); + auto settings_ptr = s3_settings.get(); - for (const auto & path : paths) + size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; + size_t current_position = 0; + + while (current_position < paths.size()) { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(path); - keys.push_back(obj); + std::vector current_chunk; + String keys; + for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(paths[current_position]); + current_chunk.push_back(obj); + + if (!keys.empty()) + keys += ", "; + keys += paths[current_position]; + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(current_chunk); + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); } - - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(keys); - - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = client_ptr->DeleteObjects(request); - - throwIfError(outcome); - } void S3ObjectStorage::removeObjectIfExists(const std::string & path) @@ -255,25 +276,35 @@ void S3ObjectStorage::removeObjectsIfExist(const std::vector & path return; auto client_ptr = client.get(); + auto settings_ptr = s3_settings.get(); - std::vector keys; - keys.reserve(paths.size()); - for (const auto & path : paths) + + size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; + size_t current_position = 0; + + while (current_position < paths.size()) { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(path); - keys.push_back(obj); + std::vector current_chunk; + String keys; + for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(paths[current_position]); + current_chunk.push_back(obj); + + if (!keys.empty()) + keys += ", "; + keys += paths[current_position]; + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(current_chunk); + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); } - - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(keys); - - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = client_ptr->DeleteObjects(request); - - throwIfError(outcome); } ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const From 6356112a76c998e75ebfb6d1dbd9eeff593e200a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 18 May 2022 15:23:31 +0000 Subject: [PATCH 035/150] Refactor GROUPING function --- src/Core/ColumnNumbers.h | 3 + src/Functions/grouping.cpp | 159 ------------------ src/Functions/grouping.h | 151 +++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 - src/Interpreters/ActionsVisitor.cpp | 110 ++++-------- .../02293_grouping_function.reference | 84 ++++----- .../0_stateless/02293_grouping_function.sql | 27 ++- ...02293_grouping_function_group_by.reference | 80 ++++----- .../02293_grouping_function_group_by.sql | 13 +- 9 files changed, 297 insertions(+), 332 deletions(-) delete mode 100644 src/Functions/grouping.cpp create mode 100644 src/Functions/grouping.h diff --git a/src/Core/ColumnNumbers.h b/src/Core/ColumnNumbers.h index 29b4c49dc83..2c1f02f720d 100644 --- a/src/Core/ColumnNumbers.h +++ b/src/Core/ColumnNumbers.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -8,6 +9,8 @@ namespace DB { using ColumnNumbers = std::vector; +using ColumnNumbersSet = std::unordered_set; using ColumnNumbersList = std::vector; +using ColumnNumbersSetList = std::vector; } diff --git a/src/Functions/grouping.cpp b/src/Functions/grouping.cpp deleted file mode 100644 index c6c6061307d..00000000000 --- a/src/Functions/grouping.cpp +++ /dev/null @@ -1,159 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class FunctionGrouping : public IFunction -{ -public: - static constexpr auto name = "grouping"; - static FunctionPtr create(ContextPtr) - { - return std::make_shared(); - } - - bool isVariadic() const override - { - return true; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool useDefaultImplementationForNulls() const override { return false; } - - bool isSuitableForConstantFolding() const override { return false; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - String getName() const override - { - return name; - } - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - //TODO: add assert for argument types - return std::make_shared(); - } - - ColumnPtr executeOrdinaryGroupBy(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const - { - auto grouping_set_map_column = checkAndGetColumnConst(arguments[0].column.get()); - auto argument_keys_column = checkAndGetColumnConst(arguments[1].column.get()); - - auto aggregation_keys_number = (*grouping_set_map_column)[0].get(); - - auto result = std::make_shared()->createColumn(); - for (size_t i = 0; i < input_rows_count; ++i) - { - auto indexes = (*argument_keys_column)[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (index.get() < aggregation_keys_number ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - - ColumnPtr executeRollup( - const ColumnUInt64 * grouping_set_column, - const ColumnConst & argument_keys_column, - UInt64 keys, - size_t input_rows_count) const - { - auto result = std::make_shared()->createColumn(); - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt64 set_index = grouping_set_column->get64(i); - - auto indexes = argument_keys_column[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (index.get() < keys - set_index ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - - ColumnPtr executeCube( - const ColumnUInt64 * grouping_set_column, - const ColumnConst & argument_keys_column, - UInt64 keys, - size_t input_rows_count) const - { - static constexpr auto ONE = static_cast(1); - auto result = std::make_shared()->createColumn(); - auto mask_base = (ONE << keys) - 1; - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt64 set_index = grouping_set_column->get64(i); - auto mask = mask_base - set_index; - auto indexes = argument_keys_column[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (mask & (ONE << (keys - index.get() - 1)) ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t input_rows_count) const override - { - if (arguments.size() == 2) - return executeOrdinaryGroupBy(arguments, input_rows_count); - - auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); - auto grouping_set_map_column = checkAndGetColumnConst(arguments[1].column.get()); - auto argument_keys_column = checkAndGetColumnConst(arguments[2].column.get()); - - auto masks = (*grouping_set_map_column)[0].get(); - auto grouping_set_map_elem_type = applyVisitor(FieldToDataType(), masks[0]); - if (!isString(grouping_set_map_elem_type)) - { - bool is_rollup = masks[0].get() == 0; - auto keys = masks[1].get(); - if (is_rollup) - return executeRollup(grouping_set_column, *argument_keys_column, keys, input_rows_count); - else - return executeCube(grouping_set_column, *argument_keys_column, keys, input_rows_count); - } - - auto result = std::make_shared()->createColumn(); - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt64 set_index = grouping_set_column->get64(i); - auto mask = masks[set_index].get(); - - auto indexes = (*argument_keys_column)[i].get(); - UInt64 value = 0; - for (auto index : indexes) - value = (value << 1) + (mask[index.get()] == '1' ? 1 : 0); - - result->insert(Field(value)); - } - return result; - } - -}; - -void registerFunctionGrouping(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h new file mode 100644 index 00000000000..a881616812b --- /dev/null +++ b/src/Functions/grouping.h @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "Core/ColumnNumbers.h" +#include "DataTypes/Serializations/ISerialization.h" +#include "base/types.h" + +namespace DB +{ + +class FunctionGroupingBase : public IFunction +{ +protected: + static constexpr UInt64 ONE = 1; + + const ColumnNumbers arguments_indexes; + +public: + FunctionGroupingBase(ColumnNumbers arguments_indexes_) + : arguments_indexes(std::move(arguments_indexes_)) + {} + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool isSuitableForConstantFolding() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + template + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count, AggregationKeyChecker checker) const + { + auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + + auto result = std::make_shared()->createColumn(); + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt64 set_index = grouping_set_column->get64(i); + + UInt64 value = 0; + for (auto index : arguments_indexes) + value = (value << 1) + (checker(set_index, index) ? 1 : 0); + + result->insert(Field(value)); + } + return result; + } +}; + +class FunctionGroupingOrdinary : public FunctionGroupingBase +{ +public: + explicit FunctionGroupingOrdinary(ColumnNumbers arguments_indexes_) + : FunctionGroupingBase(std::move(arguments_indexes_)) + {} + + String getName() const override { return "groupingOrdinary"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + UInt64 value = (ONE << arguments_indexes.size()) - 1; + return ColumnUInt64::create(input_rows_count, value); + } +}; + +class FunctionGroupingForRollup : public FunctionGroupingBase +{ + const UInt64 aggregation_keys_number; + +public: + FunctionGroupingForRollup(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_) + : FunctionGroupingBase(std::move(arguments_indexes_)) + , aggregation_keys_number(aggregation_keys_number_) + {} + + String getName() const override { return "groupingForRollup"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + return FunctionGroupingBase::executeImpl(arguments, input_rows_count, + [this](UInt64 set_index, UInt64 arg_index) + { + return arg_index < aggregation_keys_number - set_index; + } + ); + } +}; + +class FunctionGroupingForCube : public FunctionGroupingBase +{ + const UInt64 aggregation_keys_number; + +public: + + FunctionGroupingForCube(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_) + : FunctionGroupingBase(arguments_indexes_) + , aggregation_keys_number(aggregation_keys_number_) + {} + + String getName() const override { return "groupingForCube"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + return FunctionGroupingBase::executeImpl(arguments, input_rows_count, + [this](UInt64 set_index, UInt64 arg_index) + { + auto set_mask = (ONE << aggregation_keys_number) - 1 - set_index; + return set_mask & (ONE << (aggregation_keys_number - arg_index - 1)); + } + ); + } +}; + +class FunctionGroupingForGroupingSets : public FunctionGroupingBase +{ + ColumnNumbersSetList grouping_sets; +public: + FunctionGroupingForGroupingSets(ColumnNumbers arguments_indexes_, ColumnNumbersList const & grouping_sets_) + : FunctionGroupingBase(std::move(arguments_indexes_)) + { + for (auto const & set : grouping_sets_) + grouping_sets.emplace_back(set.begin(), set.end()); + } + + String getName() const override { return "groupingForGroupingSets"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + return FunctionGroupingBase::executeImpl(arguments, input_rows_count, + [this](UInt64 set_index, UInt64 arg_index) + { + return grouping_sets[set_index].contains(arg_index); + } + ); + } +}; + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 9fe1fa69b5e..9cd9c70da16 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -83,7 +83,6 @@ void registerFunctionZooKeeperSessionUptime(FunctionFactory &); void registerFunctionGetOSKernelVersion(FunctionFactory &); void registerFunctionGetTypeSerializationStreams(FunctionFactory &); void registerFunctionFlattenTuple(FunctionFactory &); -void registerFunctionGrouping(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -173,7 +172,6 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetOSKernelVersion(factory); registerFunctionGetTypeSerializationStreams(factory); registerFunctionFlattenTuple(factory); - registerFunctionGrouping(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 70493e5fefc..4f44513a5ea 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,6 +6,7 @@ #include #include +#include #include #include @@ -839,89 +841,39 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (node.name == "grouping") { - auto arguments_column_name = data.getUniqueName("__grouping_args"); + ColumnNumbers arguments_indexes; + auto aggregation_keys_number = data.aggregation_keys.size(); + for (auto const & arg : node.arguments->children) { - if (!data.hasColumn("__grouping_set_map")) - { - ColumnWithTypeAndName column; - column.name = "__grouping_set_map"; - switch (data.group_by_kind) - { - case GroupByKind::GROUPING_SETS: - { - size_t map_size = data.aggregation_keys.size() + 1; - column.type = std::make_shared(std::make_shared(map_size)); - Array maps_per_set; - for (auto & grouping_set : data.grouping_set_keys) - { - std::string key_map(map_size, '0'); - for (auto index : grouping_set) - key_map[index] = '1'; - maps_per_set.push_back(key_map); - } - auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size)); - grouping_set_map_column->insert(maps_per_set); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - break; - } - case GroupByKind::ROLLUP: - case GroupByKind::CUBE: - { - column.type = std::make_shared(std::make_shared()); - auto grouping_set_map_column = ColumnArray::create(ColumnUInt64::create()); - Array kind_and_keys_size; - kind_and_keys_size.push_back(data.group_by_kind == GroupByKind::ROLLUP ? 0 : 1); - kind_and_keys_size.push_back(data.aggregation_keys.size()); - grouping_set_map_column->insert(kind_and_keys_size); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - break; - } - case GroupByKind::ORDINARY: - { - column.type = std::make_shared(); - auto grouping_set_map_column = ColumnUInt64::create(1, data.aggregation_keys.size()); - column.column = ColumnConst::create(std::move(grouping_set_map_column), 1); - break; - } - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); - } - - data.addColumn(column); - } - ColumnWithTypeAndName column; - column.name = arguments_column_name; - column.type = std::make_shared(std::make_shared()); - Array arguments_to_keys_map; - for (auto const & arg : node.arguments->children) - { - size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); - arguments_to_keys_map.push_back(pos); - } - auto arguments_column = ColumnArray::create(ColumnUInt64::create()); - arguments_column->insert(Field{arguments_to_keys_map}); - - column.column = ColumnConst::create(ColumnPtr(std::move(arguments_column)), 1); - - data.addColumn(column); + size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); + if (pos == aggregation_keys_number) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of GROUPING function {} is not a part of GROUP BY clause", arg->getColumnName()); + arguments_indexes.push_back(pos); } - if (data.group_by_kind != GroupByKind::ORDINARY) + switch (data.group_by_kind) { - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set", "__grouping_set_map", arguments_column_name }, - column_name - ); - } - else - { - data.addFunction( - FunctionFactory::instance().get("grouping", data.getContext()), - { "__grouping_set_map", arguments_column_name }, - column_name - ); + case GroupByKind::GROUPING_SETS: + { + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.grouping_set_keys)), { "__grouping_set" }, column_name); + break; + } + case GroupByKind::ROLLUP: + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + break; + case GroupByKind::CUBE: + { + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + break; + } + case GroupByKind::ORDINARY: + { + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes))), {}, column_name); + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); } return; } diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference index f08e6d0ea99..dbae7a11f2e 100644 --- a/tests/queries/0_stateless/02293_grouping_function.reference +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -1,27 +1,27 @@ -0 2 -0 2 -0 4 -1 4 -2 4 -3 4 -4 4 -5 4 -6 4 -7 4 -8 4 -9 4 0 1 0 1 -0 4 -1 4 -2 4 -3 4 -4 4 -5 4 -6 4 -7 4 -8 4 -9 4 +0 2 +1 2 +2 2 +3 2 +4 2 +5 2 +6 2 +7 2 +8 2 +9 2 +0 1 +0 2 +0 2 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 0 0 0 1 0 1 @@ -47,26 +47,26 @@ 8 9 0 10 0 -0 1 4 -1 1 4 -2 1 4 -3 1 4 -4 1 4 -5 1 4 -6 1 4 -7 1 4 -8 1 4 -9 1 4 -0 1 6 -1 1 6 -2 1 6 -3 1 6 -4 1 6 -5 1 6 -6 1 6 -7 1 6 -8 1 6 -9 1 6 +0 1 2 +1 1 2 +2 1 2 +3 1 2 +4 1 2 +5 1 2 +6 1 2 +7 1 2 +8 1 2 +9 1 2 +0 1 3 +1 1 3 +2 1 3 +3 1 3 +4 1 3 +5 1 3 +6 1 3 +7 1 3 +8 1 3 +9 1 3 0 1 2 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index 3555f9dabab..4bbf620a619 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -7,11 +7,11 @@ GROUP BY (number), (number % 2) ) -ORDER BY number, gr; +ORDER BY number, gr; -- { serverError BAD_ARGUMENTS } SELECT number, - grouping(number, number % 3, number % 2) AS gr + grouping(number, number % 2) AS gr FROM numbers(10) GROUP BY GROUPING SETS ( @@ -22,7 +22,18 @@ ORDER BY number, gr; SELECT number, - grouping(number, number % 2, number % 3) = 2 AS gr + grouping(number % 2, number) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 2) = 1 AS gr FROM numbers(10) GROUP BY GROUPING SETS ( @@ -39,12 +50,12 @@ GROUP BY (number), (number % 2) ) -ORDER BY number, grouping(number, number % 2, number % 3) = 2; +ORDER BY number, grouping(number, number % 2) = 1; SELECT number, count(), - grouping(number, number % 2, number % 3) AS gr + grouping(number, number % 2) AS gr FROM numbers(10) GROUP BY GROUPING SETS ( @@ -62,7 +73,7 @@ GROUP BY (number), (number % 2) ) -HAVING grouping(number, number % 2, number % 3) = 4 +HAVING grouping(number, number % 2) = 2 ORDER BY number SETTINGS enable_optimize_predicate_expression = 0; @@ -74,13 +85,13 @@ GROUP BY (number), (number % 2) ) -HAVING grouping(number, number % 2, number % 3) = 2 +HAVING grouping(number, number % 2) = 1 ORDER BY number SETTINGS enable_optimize_predicate_expression = 0; SELECT number, - GROUPING(number, number % 2, number % 3) = 2 as gr + GROUPING(number, number % 2) = 1 as gr FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY GROUPING SETS ( diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 0285611b9fa..9f73523728d 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -19,47 +19,47 @@ 8 1 1 9 1 1 0 0 -0 4 -0 6 -1 4 -1 6 -2 4 -2 6 -3 4 -3 6 -4 4 -4 6 -5 4 -5 6 -6 4 -6 6 -7 4 -7 6 -8 4 -8 6 -9 4 -9 6 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 0 0 -0 4 -0 6 -1 4 -1 6 -2 4 -2 6 -3 4 -3 6 -4 4 -4 6 -5 4 -5 6 -6 4 -6 6 -7 4 -7 6 -8 4 -8 6 -9 4 -9 6 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 0 0 0 1 0 1 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index 1b0fcdb9289..e9a0338c35a 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -2,6 +2,15 @@ SELECT number, grouping(number, number % 2, number % 3) = 6 FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; -- { serverError BAD_ARGUMENTS } + +SELECT + number, + grouping(number, number % 2) = 3 +FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY number, number % 2 @@ -19,7 +28,7 @@ ORDER BY number; SELECT number, - grouping(number, number % 2, number % 3) AS gr + grouping(number, number % 2) AS gr FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY number, @@ -30,7 +39,7 @@ ORDER BY SELECT number, - grouping(number, number % 2, number % 3) AS gr + grouping(number, number % 2) AS gr FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY ROLLUP(number, number % 2) From d4abbf54b0f1ce77ee02174d34afa0e417571dcd Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 May 2022 23:21:07 +0200 Subject: [PATCH 036/150] Update 00170_s3_cache.sql --- tests/queries/1_stateful/00170_s3_cache.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/1_stateful/00170_s3_cache.sql b/tests/queries/1_stateful/00170_s3_cache.sql index 6549bcf5479..b03b2a16bf0 100644 --- a/tests/queries/1_stateful/00170_s3_cache.sql +++ b/tests/queries/1_stateful/00170_s3_cache.sql @@ -2,11 +2,9 @@ -- { echo } -SYSTEM DROP FILESYSTEM CACHE; - SET enable_filesystem_cache_on_write_operations=0; SET max_memory_usage='20G'; - +SYSTEM DROP FILESYSTEM CACHE; SELECT count() FROM test.hits_s3; SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; From d4c66f4a4859fae200d740567572b11172b96a6d Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 19 May 2022 16:36:51 +0000 Subject: [PATCH 037/150] Code cleanup & fix GROUPING() with TOTALS --- src/Functions/grouping.h | 10 ++-- src/Interpreters/ActionsVisitor.cpp | 7 +++ src/Interpreters/InterpreterSelectQuery.cpp | 7 ++- src/Parsers/ASTSelectQuery.h | 34 +++++++------ src/Processors/QueryPlan/AggregatingStep.cpp | 2 +- src/Processors/QueryPlan/CubeStep.cpp | 23 ++++++++- src/Processors/QueryPlan/CubeStep.h | 1 + src/Processors/QueryPlan/RollupStep.cpp | 7 ++- src/Processors/QueryPlan/RollupStep.h | 1 + ...02293_grouping_function_group_by.reference | 48 +++++++++++++++++++ .../02293_grouping_function_group_by.sql | 38 +++++++++++++++ 11 files changed, 146 insertions(+), 32 deletions(-) diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index a881616812b..934be18345d 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -1,14 +1,14 @@ -#include +#pragma once + #include #include #include #include +#include +#include #include #include -#include -#include "Core/ColumnNumbers.h" -#include "DataTypes/Serializations/ISerialization.h" -#include "base/types.h" +#include namespace DB { diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 4f44513a5ea..d22989219a4 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -65,6 +65,8 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int DUPLICATE_COLUMN; extern const int LOGICAL_ERROR; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -841,6 +843,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (node.name == "grouping") { + size_t arguments_size = node.arguments->children.size(); + if (arguments_size == 0) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument"); + if (arguments_size > 64) + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size); ColumnNumbers arguments_indexes; auto aggregation_keys_number = data.aggregation_keys.size(); for (auto const & arg : node.arguments->children) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bfadc66352..3b438ef9863 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1095,6 +1095,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

desc->type == ProjectionDescription::Type::Aggregate) { query_info.projection->aggregate_overflow_row = aggregate_overflow_row; @@ -1387,11 +1390,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs index; - index.reserve(output_header.columns() + 2); + index.reserve(output_header.columns() + 1); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); const auto * grouping_node = &dag->addColumn( diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 43a6491157a..91c85a08412 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,7 +1,9 @@ #include #include +#include #include #include +#include namespace DB { @@ -24,6 +26,7 @@ static ITransformingStep::Traits getTraits() CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) + , keys_size(params_->params.keys_size) , params(std::move(params_)) { /// Aggregation keys are distinct @@ -31,14 +34,30 @@ CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsP output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); } -void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) +{ + auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + + auto grouping_col = ColumnUInt64::create(1, grouping_set_number); + const auto * grouping_node = &dag->addColumn( + {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); + + grouping_node = &dag->materializeNode(*grouping_node); + auto & index = dag->getIndex(); + index.insert(index.begin(), grouping_node); + + auto expression = std::make_shared(dag, settings.getActionsSettings()); + return std::make_shared(header, expression); +} + +void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { pipeline.resize(1); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return nullptr; + return addGroupingSetForTotals(header, settings, (UInt64(1) << keys_size) - 1); return std::make_shared(header, std::move(params)); }); diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 1079bed5398..d3e26f9379f 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -21,6 +21,7 @@ public: const Aggregator::Params & getParams() const; private: + size_t keys_size; AggregatingTransformParamsPtr params; }; diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 2961ef5ddbd..3b061f9c246 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -25,20 +25,23 @@ static ITransformingStep::Traits getTraits() RollupStep::RollupStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) , params(std::move(params_)) + , keys_size(params->params.keys_size) { /// Aggregation keys are distinct for (auto key : params->params.keys) output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); } -void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number); + +void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { pipeline.resize(1); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return nullptr; + return addGroupingSetForTotals(header, settings, keys_size); return std::make_shared(header, std::move(params)); }); diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index 7cd71fecdc1..3dce6f74d9f 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -20,6 +20,7 @@ public: private: AggregatingTransformParamsPtr params; + size_t keys_size; }; } diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 9f73523728d..021083db6eb 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -126,3 +126,51 @@ 8 6 9 5 9 6 +0 0 +0 1 +0 1 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 + +0 0 +0 0 +0 2 +0 3 +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 +4 2 +4 3 +5 2 +5 3 +6 2 +6 3 +7 2 +7 3 +8 2 +8 3 +9 2 +9 3 + +0 0 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index e9a0338c35a..b30080b88af 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -75,3 +75,41 @@ GROUP BY HAVING grouping(number) != 0 ORDER BY number, gr; + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +ORDER BY + number, gr; + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } + +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +ORDER BY + number, gr; From c87c3fcfd93225589d95bc14740c1f4aba490297 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 May 2022 19:25:45 +0200 Subject: [PATCH 038/150] Move Azure blob storage --- .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 30 ++- .../AzureBlobStorage/AzureBlobStorageAuth.h | 5 +- .../AzureBlobStorage/DiskAzureBlobStorage.cpp | 168 -------------- .../AzureBlobStorage/DiskAzureBlobStorage.h | 86 ------- .../registerDiskAzureBlobStorage.cpp | 53 +++-- src/Disks/AzureObjectStorage.cpp | 213 ++++++++++++++++++ src/Disks/AzureObjectStorage.h | 113 ++++++++++ src/Disks/DiskObjectStorage.cpp | 2 +- src/Disks/DiskObjectStorage.h | 2 +- src/Disks/DiskRestartProxy.cpp | 4 +- src/Disks/HDFSObjectStorage.cpp | 7 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 4 +- src/Disks/S3/registerDiskS3.cpp | 1 - src/IO/ReadBufferFromAzureBlobStorage.cpp | 2 +- src/IO/ReadBufferFromAzureBlobStorage.h | 6 +- src/IO/WriteBufferFromAzureBlobStorage.cpp | 25 +- src/IO/WriteBufferFromAzureBlobStorage.h | 10 +- 17 files changed, 419 insertions(+), 312 deletions(-) delete mode 100644 src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp delete mode 100644 src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h create mode 100644 src/Disks/AzureObjectStorage.cpp create mode 100644 src/Disks/AzureObjectStorage.h diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp index 94553ba04e9..c078f584a09 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -66,27 +66,27 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr template -std::shared_ptr getClientWithConnectionString(const String & connection_str, const String & container_name) = delete; +std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name) = delete; template<> -std::shared_ptr getClientWithConnectionString( +std::unique_ptr getClientWithConnectionString( const String & connection_str, const String & /*container_name*/) { - return std::make_shared(BlobServiceClient::CreateFromConnectionString(connection_str)); + return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str)); } template<> -std::shared_ptr getClientWithConnectionString( +std::unique_ptr getClientWithConnectionString( const String & connection_str, const String & container_name) { - return std::make_shared(BlobContainerClient::CreateFromConnectionString(connection_str, container_name)); + return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name)); } template -std::shared_ptr getAzureBlobStorageClientWithAuth( +std::unique_ptr getAzureBlobStorageClientWithAuth( const String & url, const String & container_name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { if (config.has(config_prefix + ".connection_string")) @@ -101,15 +101,15 @@ std::shared_ptr getAzureBlobStorageClientWithAuth( config.getString(config_prefix + ".account_name"), config.getString(config_prefix + ".account_key") ); - return std::make_shared(url, storage_shared_key_credential); + return std::make_unique(url, storage_shared_key_credential); } auto managed_identity_credential = std::make_shared(); - return std::make_shared(url, managed_identity_credential); + return std::make_unique(url, managed_identity_credential); } -std::shared_ptr getAzureBlobContainerClient( +std::unique_ptr getAzureBlobContainerClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); @@ -136,10 +136,20 @@ std::shared_ptr getAzureBlobContainerClient( } } - return std::make_shared( + return std::make_unique( blob_service_client->CreateBlobContainer(container_name).Value); } +std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/) +{ + return std::make_unique( + config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024), + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".max_single_read_retries", 3), + config.getInt(config_prefix + ".max_single_download_retries", 3) + ); +} + } #endif diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h index 048daa7c9dc..32d3ca9945a 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h @@ -6,13 +6,16 @@ #include #include +#include namespace DB { -std::shared_ptr getAzureBlobContainerClient( +std::unique_ptr getAzureBlobContainerClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/); + } #endif diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp deleted file mode 100644 index 556c28bd3f4..00000000000 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int AZURE_BLOB_STORAGE_ERROR; -} - - -DiskAzureBlobStorageSettings::DiskAzureBlobStorageSettings( - UInt64 max_single_part_upload_size_, - UInt64 min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int thread_pool_size_) : - max_single_part_upload_size(max_single_part_upload_size_), - min_bytes_for_seek(min_bytes_for_seek_), - max_single_read_retries(max_single_read_retries_), - max_single_download_retries(max_single_download_retries_), - thread_pool_size(thread_pool_size_) {} - - -DiskAzureBlobStorage::DiskAzureBlobStorage( - const String & name_, - DiskPtr metadata_disk_, - std::shared_ptr blob_container_client_, - SettingsPtr settings_, - GetDiskSettings settings_getter_) : - IDiskRemote(name_, "", metadata_disk_, nullptr, "DiskAzureBlobStorage", settings_->thread_pool_size), - blob_container_client(blob_container_client_), - current_settings(std::move(settings_)), - settings_getter(settings_getter_) {} - - -std::unique_ptr DiskAzureBlobStorage::readFile( - const String & path, - const ReadSettings & read_settings, - std::optional, - std::optional) const -{ - auto settings = current_settings.get(); - auto metadata = readMetadata(path); - - LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); - - auto reader_impl = std::make_unique( - blob_container_client, metadata.remote_fs_root_path, metadata.remote_fs_objects, - settings->max_single_read_retries, settings->max_single_download_retries, read_settings); - - if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - auto reader = getThreadPoolReader(); - return std::make_unique(reader, read_settings, std::move(reader_impl)); - } - else - { - auto buf = std::make_unique(std::move(reader_impl)); - return std::make_unique(std::move(buf), current_settings.get()->min_bytes_for_seek); - } -} - - -std::unique_ptr DiskAzureBlobStorage::writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings &) -{ - auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name - - LOG_TRACE(log, "{} to file by path: {}. AzureBlob Storage path: {}", - mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), blob_path); - - auto buffer = std::make_unique( - blob_container_client, - blob_path, - current_settings.get()->max_single_part_upload_size, - buf_size); - - auto create_metadata_callback = [this, path, mode, blob_path] (size_t count) - { - readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_path, count] (Metadata & metadata) { metadata.addObject(blob_path, count); return true; }); - }; - - return std::make_unique(std::move(buffer), std::move(create_metadata_callback), blob_path); -} - - -DiskType DiskAzureBlobStorage::getType() const -{ - return DiskType::AzureBlobStorage; -} - - -bool DiskAzureBlobStorage::isRemote() const -{ - return true; -} - - -bool DiskAzureBlobStorage::supportZeroCopyReplication() const -{ - return true; -} - - -bool DiskAzureBlobStorage::checkUniqueId(const String & id) const -{ - Azure::Storage::Blobs::ListBlobsOptions blobs_list_options; - blobs_list_options.Prefix = id; - blobs_list_options.PageSizeHint = 1; - - auto blobs_list_response = blob_container_client->ListBlobs(blobs_list_options); - auto blobs_list = blobs_list_response.Blobs; - - for (const auto & blob : blobs_list) - { - if (id == blob.Name) - return true; - } - - return false; -} - - -void DiskAzureBlobStorage::removeFromRemoteFS(const std::vector & paths) -{ - for (const auto & path : paths) - { - try - { - auto delete_info = blob_container_client->DeleteBlob(path); - if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); - } - catch (const Azure::Storage::StorageException & e) - { - LOG_INFO(log, "Caught an error while deleting file {} : {}", path, e.Message); - throw; - } - } -} - -void DiskAzureBlobStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) -{ - auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context); - - current_settings.set(std::move(new_settings)); - - if (AsyncExecutor * exec = dynamic_cast(&getExecutor())) - exec->setMaxThreads(current_settings.get()->thread_pool_size); -} - -} - -#endif diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h deleted file mode 100644 index ff99e246d31..00000000000 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -struct DiskAzureBlobStorageSettings final -{ - DiskAzureBlobStorageSettings( - UInt64 max_single_part_upload_size_, - UInt64 min_bytes_for_seek_, - int max_single_read_retries, - int max_single_download_retries, - int thread_pool_size_); - - size_t max_single_part_upload_size; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - UInt64 min_bytes_for_seek; - size_t max_single_read_retries; - size_t max_single_download_retries; - size_t thread_pool_size; -}; - - -class DiskAzureBlobStorage final : public IDiskRemote -{ -public: - - using SettingsPtr = std::unique_ptr; - using GetDiskSettings = std::function; - - DiskAzureBlobStorage( - const String & name_, - DiskPtr metadata_disk_, - std::shared_ptr blob_container_client_, - SettingsPtr settings_, - GetDiskSettings settings_getter_); - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) override; - - DiskType getType() const override; - - bool isRemote() const override; - - bool supportZeroCopyReplication() const override; - - bool checkUniqueId(const String & id) const override; - - void removeFromRemoteFS(const std::vector & paths) override; - - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; - -private: - - /// client used to access the files in the Blob Storage cloud - std::shared_ptr blob_container_client; - - MultiVersion current_settings; - /// Gets disk settings from context. - GetDiskSettings settings_getter; -}; - -} - -#endif diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 56df793783e..e111406a587 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -7,9 +7,9 @@ #include #include #include -#include #include - +#include +#include namespace DB { @@ -26,14 +26,12 @@ constexpr char test_file[] = "test.txt"; constexpr char test_str[] = "test"; constexpr size_t test_str_size = 4; - void checkWriteAccess(IDisk & disk) { auto file = disk.writeFile(test_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); file->write(test_str, test_str_size); } - void checkReadAccess(IDisk & disk) { auto file = disk.readFile(test_file); @@ -43,7 +41,6 @@ void checkReadAccess(IDisk & disk) throw Exception("No read access to disk", ErrorCodes::PATH_ACCESS_DENIED); } - void checkReadWithOffset(IDisk & disk) { auto file = disk.readFile(test_file); @@ -56,24 +53,11 @@ void checkReadWithOffset(IDisk & disk) throw Exception("Failed to read file with offset", ErrorCodes::PATH_ACCESS_DENIED); } - void checkRemoveAccess(IDisk & disk) { disk.removeFile(test_file); } - -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/) -{ - return std::make_unique( - config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024), - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".max_single_read_retries", 3), - config.getInt(config_prefix + ".max_single_download_retries", 3), - config.getInt(config_prefix + ".thread_pool_size", 16) - ); -} - } void registerDiskAzureBlobStorage(DiskFactory & factory) @@ -87,12 +71,27 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) { auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - std::shared_ptr azure_blob_storage_disk = std::make_shared( + FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); + + ObjectStoragePtr azure_object_storage = std::make_unique( + std::move(cache), name, - metadata_disk, getAzureBlobContainerClient(config, config_prefix), - getSettings(config, config_prefix, context), - getSettings + getAzureBlobStorageSettings(config, config_prefix, context)); + + + uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + + std::shared_ptr azure_blob_storage_disk = std::make_shared( + name, + /* no namespaces */"", + "DiskAzureBlobStorage", + metadata_disk, + std::move(azure_object_storage), + DiskType::AzureBlobStorage, + send_metadata, + copy_thread_pool_size ); if (!config.getBool(config_prefix + ".skip_access_check", false)) @@ -103,9 +102,17 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) checkRemoveAccess(*azure_blob_storage_disk); } +#ifdef NDEBUG + bool use_cache = true; +#else + /// Current cache implementation lead to allocations in destructor of + /// read buffer. + bool use_cache = false; +#endif + azure_blob_storage_disk->startup(context); - if (config.getBool(config_prefix + ".cache_enabled", true)) + if (config.getBool(config_prefix + ".cache_enabled", use_cache)) { String cache_path = config.getString(config_prefix + ".cache_path", context->getPath() + "disks/" + name + "/cache/"); azure_blob_storage_disk = wrapWithCache(azure_blob_storage_disk, "azure-blob-storage-cache", cache_path, metadata_path); diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/AzureObjectStorage.cpp new file mode 100644 index 00000000000..68f7f63638a --- /dev/null +++ b/src/Disks/AzureObjectStorage.cpp @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int AZURE_BLOB_STORAGE_ERROR; + extern const int UNSUPPORTED_METHOD; +} + + +AzureObjectStorage::AzureObjectStorage( + FileCachePtr && cache_, + const String & name_, + AzureClientPtr && client_, + SettingsPtr && settings_) + : IObjectStorage(std::move(cache_)) + , name(name_) + , client(std::move(client_)) + , settings(std::move(settings_)) +{ +} + +bool AzureObjectStorage::exists(const std::string & uri) const +{ + auto client_ptr = client.get(); + + /// What a shame, no Exists method... + Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = uri; + options.PageSizeHint = 1; + + auto blobs_list_response = client_ptr->ListBlobs(options); + auto blobs_list = blobs_list_response.Blobs; + + for (const auto & blob : blobs_list) + { + if (uri == blob.Name) + return true; + } + + return false; +} + +std::unique_ptr AzureObjectStorage::readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto settings_ptr = settings.get(); + + return std::make_unique( + client.get(), path, settings_ptr->max_single_read_retries, + settings_ptr->max_single_download_retries, read_settings.remote_fs_buffer_size); +} + +std::unique_ptr AzureObjectStorage::readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto settings_ptr = settings.get(); + auto reader_impl = std::make_unique( + client.get(), common_path_prefix, blobs_to_read, + settings_ptr->max_single_read_retries, settings_ptr->max_single_download_retries, read_settings); + + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + { + auto reader = getThreadPoolReader(); + return std::make_unique(reader, read_settings, std::move(reader_impl)); + } + else + { + auto buf = std::make_unique(std::move(reader_impl)); + return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); + } +} + +/// Open the file for write and return WriteBufferFromFileBase object. +std::unique_ptr AzureObjectStorage::writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional, + FinalizeCallback && finalize_callback, + size_t buf_size, + const WriteSettings &) +{ + if (mode != WriteMode::Rewrite) + throw Exception("Azure storage doesn't support append", ErrorCodes::UNSUPPORTED_METHOD); + + auto buffer = std::make_unique( + client.get(), + path, + settings.get()->max_single_part_upload_size, + buf_size); + + return std::make_unique(std::move(buffer), std::move(finalize_callback), path); +} + +void AzureObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +{ + auto client_ptr = client.get(); + + Azure::Storage::Blobs::ListBlobsOptions blobs_list_options; + blobs_list_options.Prefix = path; + + auto blobs_list_response = client_ptr->ListBlobs(blobs_list_options); + auto blobs_list = blobs_list_response.Blobs; + + for (const auto & blob : blobs_list) + children.emplace_back(blob.Name, blob.BlobSize); +} + +/// Remove file. Throws exception if file doesn't exists or it's a directory. +void AzureObjectStorage::removeObject(const std::string & path) +{ + auto client_ptr = client.get(); + auto delete_info = client_ptr->DeleteBlob(path); + if (!delete_info.Value.Deleted) + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); +} + +void AzureObjectStorage::removeObjects(const std::vector & paths) +{ + auto client_ptr = client.get(); + for (const auto & path : paths) + { + auto delete_info = client_ptr->DeleteBlob(path); + if (!delete_info.Value.Deleted) + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); + } +} + +void AzureObjectStorage::removeObjectIfExists(const std::string & path) +{ + auto client_ptr = client.get(); + auto delete_info = client_ptr->DeleteBlob(path); +} + +void AzureObjectStorage::removeObjectsIfExist(const std::vector & paths) +{ + auto client_ptr = client.get(); + for (const auto & path : paths) + auto delete_info = client_ptr->DeleteBlob(path); +} + + +ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const +{ + auto client_ptr = client.get(); + auto blob_client = client_ptr->GetBlobClient(path); + auto properties = blob_client.GetProperties().Value; + ObjectMetadata result; + result.size_bytes = properties.BlobSize; + if (!properties.Metadata.empty()) + { + result.attributes.emplace(); + for (const auto & [key, value] : properties.Metadata) + (*result.attributes)[key] = value; + } + result.last_modified.emplace(properties.LastModified.time_since_epoch().count()); + return result; +} + +void AzureObjectStorage::copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes) +{ + auto client_ptr = client.get(); + auto dest_blob_client = client_ptr->GetBlobClient(object_to); + auto source_blob_client = client_ptr->GetBlobClient(object_from); + Azure::Storage::Blobs::CopyBlobFromUriOptions copy_options; + if (object_to_attributes.has_value()) + { + for (const auto & [key, value] : *object_to_attributes) + copy_options.Metadata[key] = value; + } + + dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options); +} + +void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_client = getAzureBlobContainerClient(config, config_prefix); + + client.set(std::move(new_client)); + settings.set(std::move(new_settings)); +} + + +std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + return std::make_unique( + nullptr, + name, + getAzureBlobContainerClient(config, config_prefix), + getAzureBlobStorageSettings(config, config_prefix, context) + ); +} + +} diff --git a/src/Disks/AzureObjectStorage.h b/src/Disks/AzureObjectStorage.h new file mode 100644 index 00000000000..da6393fd55d --- /dev/null +++ b/src/Disks/AzureObjectStorage.h @@ -0,0 +1,113 @@ +#pragma once +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +struct AzureObjectStorageSettings +{ + AzureObjectStorageSettings( + uint64_t max_single_part_upload_size_, + uint64_t min_bytes_for_seek_, + int max_single_read_retries_, + int max_single_download_retries_) + : max_single_part_upload_size(max_single_part_upload_size_) + , min_bytes_for_seek(min_bytes_for_seek_) + , max_single_read_retries(max_single_read_retries_) + , max_single_download_retries(max_single_download_retries_) + { + } + + size_t max_single_part_upload_size; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + uint64_t min_bytes_for_seek; + size_t max_single_read_retries; + size_t max_single_download_retries; +}; + +using AzureClient = Azure::Storage::Blobs::BlobContainerClient; +using AzureClientPtr = std::unique_ptr; + +class AzureObjectStorage : public IObjectStorage +{ +public: + + using SettingsPtr = std::unique_ptr; + + AzureObjectStorage( + FileCachePtr && cache_, + const String & name_, + AzureClientPtr && client_, + SettingsPtr && settings_); + + bool exists(const std::string & uri) const override; + + std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + /// Open the file for write and return WriteBufferFromFileBase object. + std::unique_ptr writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) override; + + void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const std::string & path) override; + + void removeObjects(const std::vector & paths) override; + + void removeObjectIfExists(const std::string & path) override; + + void removeObjectsIfExist(const std::vector & paths) override; + + ObjectMetadata getObjectMetadata(const std::string & path) const override; + + void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) override; + + void shutdown() override {} + + void startup() override {} + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + + String getObjectsNamespace() const override { return ""; } + + std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + +private: + const String name; + /// client used to access the files in the Blob Storage cloud + MultiVersion client; + MultiVersion settings; +}; + +} + +#endif diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index bfec350caba..31ae7dc575f 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -875,7 +875,7 @@ void DiskObjectStorageMetadataHelper::findLastRevision() LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); } -int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const +int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) { const std::string path = source_path + SCHEMA_VERSION_OBJECT; int version = 0; diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index f1687fe19b6..76a0191ade5 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -298,7 +298,7 @@ public: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; void findLastRevision(); - int readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const; + static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 903caf705c5..b1bba40026a 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -5,8 +5,10 @@ namespace DB { + namespace ErrorCodes -{extern const int DEADLOCK_AVOIDED; +{ + extern const int DEADLOCK_AVOIDED; } using Millis = std::chrono::milliseconds; diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index cbd89bcca88..9e99e7aa820 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -35,7 +35,6 @@ bool HDFSObjectStorage::exists(const std::string & hdfs_uri) const const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); - } std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT @@ -72,9 +71,9 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); /// Single O_WRONLY in libhdfs adds O_TRUNC - auto hdfs_buffer = std::make_unique(path, - config, settings->replication, buf_size, - mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); + auto hdfs_buffer = std::make_unique( + path, config, settings->replication, buf_size, + mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); return std::make_unique(std::move(hdfs_buffer), std::move(finalize_callback), path); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 509b73da5d4..ba477ced601 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -146,7 +146,7 @@ class ReadBufferFromAzureBlobStorageGather final : public ReadBufferFromRemoteFS { public: ReadBufferFromAzureBlobStorageGather( - std::shared_ptr blob_container_client_, + std::shared_ptr blob_container_client_, const std::string & common_path_prefix_, const BlobsPathToSize & blobs_to_read_, size_t max_single_read_retries_, @@ -162,7 +162,7 @@ public: SeekableReadBufferPtr createImplementationBufferImpl(const String & path, size_t file_size) override; private: - std::shared_ptr blob_container_client; + std::shared_ptr blob_container_client; size_t max_single_read_retries; size_t max_single_download_retries; }; diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index fda1a1f51b0..5da49be12e4 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -80,7 +80,6 @@ void registerDiskS3(DiskFactory & factory) FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - ObjectStoragePtr s3_storage = std::make_unique( std::move(cache), getClient(config, config_prefix, context), getSettings(config, config_prefix, context), diff --git a/src/IO/ReadBufferFromAzureBlobStorage.cpp b/src/IO/ReadBufferFromAzureBlobStorage.cpp index 41cec694786..2576b10f9ac 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/IO/ReadBufferFromAzureBlobStorage.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + std::shared_ptr blob_container_client_, const String & path_, size_t max_single_read_retries_, size_t max_single_download_retries_, diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index 80078afd6d0..b7459ccead1 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -17,8 +17,8 @@ class ReadBufferFromAzureBlobStorage : public SeekableReadBuffer, public WithFil { public: - explicit ReadBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + ReadBufferFromAzureBlobStorage( + std::shared_ptr blob_container_client_, const String & path_, size_t max_single_read_retries_, size_t max_single_download_retries_, @@ -41,7 +41,7 @@ private: void initialize(); std::unique_ptr data_stream; - std::shared_ptr blob_container_client; + std::shared_ptr blob_container_client; std::unique_ptr blob_client; const String path; diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp index eef1c8108fa..18e03b08817 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp @@ -12,14 +12,18 @@ namespace DB { WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, - size_t buf_size_) : - BufferWithOwnMemory(buf_size_, nullptr, 0), - blob_container_client(blob_container_client_), - max_single_part_upload_size(max_single_part_upload_size_), - blob_path(blob_path_) {} + size_t buf_size_, + std::optional> attributes_) + : BufferWithOwnMemory(buf_size_, nullptr, 0) + , blob_container_client(blob_container_client_) + , max_single_part_upload_size(max_single_part_upload_size_) + , blob_path(blob_path_) + , attributes(attributes_) +{ +} WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() @@ -29,6 +33,15 @@ WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() void WriteBufferFromAzureBlobStorage::finalizeImpl() { + if (attributes.has_value()) + { + auto blob_client = blob_container_client->GetBlobClient(blob_path); + Azure::Storage::Metadata metadata; + for (const auto & [key, value] : *attributes) + metadata[key] = value; + blob_client.SetMetadata(metadata); + } + const size_t max_tries = 3; for (size_t i = 0; i < max_tries; ++i) { diff --git a/src/IO/WriteBufferFromAzureBlobStorage.h b/src/IO/WriteBufferFromAzureBlobStorage.h index 75336c497eb..ef13a24abd8 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/IO/WriteBufferFromAzureBlobStorage.h @@ -19,11 +19,12 @@ class WriteBufferFromAzureBlobStorage : public BufferWithOwnMemory { public: - explicit WriteBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + WriteBufferFromAzureBlobStorage( + std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, - size_t buf_size_); + size_t buf_size_, + std::optional> attributes_ = {}); ~WriteBufferFromAzureBlobStorage() override; @@ -32,9 +33,10 @@ public: private: void finalizeImpl() override; - std::shared_ptr blob_container_client; + std::shared_ptr blob_container_client; size_t max_single_part_upload_size; const String blob_path; + std::optional> attributes; }; } From 92c15ec97c09284bc90bb172a6b835b52f9867f5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 May 2022 20:07:15 +0200 Subject: [PATCH 039/150] Get rid of IDiskRemote --- .../AzureBlobStorage/AzureBlobStorageAuth.h | 1 - src/Disks/DiskObjectStorage.cpp | 1 + src/Disks/DiskWebServer.cpp | 9 +- src/Disks/DiskWebServer.h | 5 +- src/Disks/IDiskObjectStorage.h | 8 - src/Disks/IDiskRemote.cpp | 702 ------------------ src/Disks/IDiskRemote.h | 302 -------- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 1 - src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- src/Disks/IO/ReadIndirectBufferFromRemoteFS.h | 1 - src/Disks/IO/ThreadPoolRemoteFSReader.h | 1 - .../IO/WriteIndirectBufferFromRemoteFS.h | 1 - src/Interpreters/Context.cpp | 4 +- src/Storages/MergeTree/DataPartsExchange.cpp | 1 - src/Storages/System/StorageSystemDisks.cpp | 1 - 15 files changed, 15 insertions(+), 1025 deletions(-) delete mode 100644 src/Disks/IDiskObjectStorage.h delete mode 100644 src/Disks/IDiskRemote.cpp delete mode 100644 src/Disks/IDiskRemote.h diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h index 32d3ca9945a..fcd4fd51b49 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h @@ -4,7 +4,6 @@ #if USE_AZURE_BLOB_STORAGE -#include #include #include diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 31ae7dc575f..4b0134b2d07 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB { diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 576ded94b01..4f1fc1ad8fb 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -9,8 +9,13 @@ #include #include -#include +#include +#include +#include + #include + + #include #include #include @@ -173,7 +178,7 @@ std::unique_ptr DiskWebServer::readFile(const String & p if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = IDiskRemote::getThreadPoolReader(); + auto reader = IObjectStorage::getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(web_impl), min_bytes_for_seek); } else diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index dd699921f7c..47042fabc3d 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -1,10 +1,13 @@ #pragma once -#include #include #include #include +#include +#include +#include + namespace DB { diff --git a/src/Disks/IDiskObjectStorage.h b/src/Disks/IDiskObjectStorage.h deleted file mode 100644 index 90794301e54..00000000000 --- a/src/Disks/IDiskObjectStorage.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -} diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp deleted file mode 100644 index d72d7004cb7..00000000000 --- a/src/Disks/IDiskRemote.cpp +++ /dev/null @@ -1,702 +0,0 @@ -#include - -#include "Disks/DiskFactory.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int INCORRECT_DISK_INDEX; - extern const int UNKNOWN_FORMAT; - extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED;; - extern const int FILE_DOESNT_EXIST; - extern const int BAD_FILE_TYPE; -} - - -IDiskRemote::Metadata IDiskRemote::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - return result; -} - - -IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.save(sync); - return result; -} - -IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - return result; -} - -IDiskRemote::Metadata IDiskRemote::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - updater(result); - result.save(sync); - return result; -} - -IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - metadata_disk_->removeFile(metadata_file_path_); - - return result; - -} - -IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) -{ - if (overwrite || !metadata_disk_->exists(metadata_file_path_)) - { - return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); - } - else - { - auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - if (result.read_only) - throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); - return result; - } -} - -void IDiskRemote::Metadata::load() -{ - const ReadSettings read_settings; - auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ - - UInt32 version; - readIntText(version, *buf); - - if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) - throw Exception( - ErrorCodes::UNKNOWN_FORMAT, - "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", - metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); - - assertChar('\n', *buf); - - UInt32 remote_fs_objects_count; - readIntText(remote_fs_objects_count, *buf); - assertChar('\t', *buf); - readIntText(total_size, *buf); - assertChar('\n', *buf); - remote_fs_objects.resize(remote_fs_objects_count); - - for (size_t i = 0; i < remote_fs_objects_count; ++i) - { - String remote_fs_object_path; - size_t remote_fs_object_size; - readIntText(remote_fs_object_size, *buf); - assertChar('\t', *buf); - readEscapedString(remote_fs_object_path, *buf); - if (version == VERSION_ABSOLUTE_PATHS) - { - if (!remote_fs_object_path.starts_with(remote_fs_root_path)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, - "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", - remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); - - remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); - } - assertChar('\n', *buf); - remote_fs_objects[i].relative_path = remote_fs_object_path; - remote_fs_objects[i].bytes_size = remote_fs_object_size; - } - - readIntText(ref_count, *buf); - assertChar('\n', *buf); - - if (version >= VERSION_READ_ONLY_FLAG) - { - readBoolText(read_only, *buf); - assertChar('\n', *buf); - } -} - -/// Load metadata by path or create empty if `create` flag is set. -IDiskRemote::Metadata::Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_) - : remote_fs_root_path(remote_fs_root_path_) - , metadata_file_path(metadata_file_path_) - , metadata_disk(metadata_disk_) -{ -} - -void IDiskRemote::Metadata::addObject(const String & path, size_t size) -{ - total_size += size; - remote_fs_objects.emplace_back(path, size); -} - - -void IDiskRemote::Metadata::saveToBuffer(WriteBuffer & buf, bool sync) -{ - writeIntText(VERSION_RELATIVE_PATHS, buf); - writeChar('\n', buf); - - writeIntText(remote_fs_objects.size(), buf); - writeChar('\t', buf); - writeIntText(total_size, buf); - writeChar('\n', buf); - - for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) - { - writeIntText(remote_fs_object_size, buf); - writeChar('\t', buf); - writeEscapedString(remote_fs_object_path, buf); - writeChar('\n', buf); - } - - writeIntText(ref_count, buf); - writeChar('\n', buf); - - writeBoolText(read_only, buf); - writeChar('\n', buf); - - buf.finalize(); - if (sync) - buf.sync(); - -} - -/// Fsync metadata file if 'sync' flag is set. -void IDiskRemote::Metadata::save(bool sync) -{ - auto buf = metadata_disk->writeFile(metadata_file_path, 1024); - saveToBuffer(*buf, sync); -} - -std::string IDiskRemote::Metadata::serializeToString() -{ - WriteBufferFromOwnString write_buf; - saveToBuffer(write_buf, false); - return write_buf.str(); -} - -IDiskRemote::Metadata IDiskRemote::readMetadataUnlocked(const String & path, std::shared_lock &) const -{ - return Metadata::readMetadata(remote_fs_root_path, metadata_disk, path); -} - - -IDiskRemote::Metadata IDiskRemote::readMetadata(const String & path) const -{ - std::shared_lock lock(metadata_mutex); - return readMetadataUnlocked(path, lock); -} - -IDiskRemote::Metadata IDiskRemote::readUpdateAndStoreMetadata(const String & path, bool sync, IDiskRemote::MetadataUpdater updater) -{ - std::unique_lock lock(metadata_mutex); - return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); -} - - -IDiskRemote::Metadata IDiskRemote::readUpdateStoreMetadataAndRemove(const String & path, bool sync, IDiskRemote::MetadataUpdater updater) -{ - std::unique_lock lock(metadata_mutex); - return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater); -} - -IDiskRemote::Metadata IDiskRemote::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, IDiskRemote::MetadataUpdater updater) -{ - if (mode == WriteMode::Rewrite || !metadata_disk->exists(path)) - { - std::unique_lock lock(metadata_mutex); - return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); - } - else - { - return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); - } -} - -IDiskRemote::Metadata IDiskRemote::createAndStoreMetadata(const String & path, bool sync) -{ - return Metadata::createAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync); -} - -IDiskRemote::Metadata IDiskRemote::createUpdateAndStoreMetadata(const String & path, bool sync, IDiskRemote::MetadataUpdater updater) -{ - return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); -} - - -std::unordered_map IDiskRemote::getSerializedMetadata(const std::vector & file_paths) const -{ - std::unordered_map metadatas; - - std::shared_lock lock(metadata_mutex); - - for (const auto & path : file_paths) - { - IDiskRemote::Metadata metadata = readMetadataUnlocked(path, lock); - metadata.ref_count = 0; - metadatas[path] = metadata.serializeToString(); - } - - return metadatas; -} - -void IDiskRemote::removeMetadata(const String & path, std::vector & paths_to_remove) -{ - LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); - - if (!metadata_disk->exists(path)) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path); - - if (!metadata_disk->isFile(path)) - throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path); - - try - { - auto metadata_updater = [&paths_to_remove, this] (Metadata & metadata) - { - if (metadata.ref_count == 0) - { - for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) - { - - paths_to_remove.push_back(remote_fs_root_path + remote_fs_object_path); - - if (cache) - { - auto key = cache->hash(remote_fs_object_path); - cache->remove(key); - } - } - - return false; - } - else /// In other case decrement number of references, save metadata and delete hardlink. - { - --metadata.ref_count; - } - - return true; - }; - - readUpdateStoreMetadataAndRemove(path, false, metadata_updater); - /// If there is no references - delete content from remote FS. - } - catch (const Exception & e) - { - /// If it's impossible to read meta - just remove it from FS. - if (e.code() == ErrorCodes::UNKNOWN_FORMAT) - { - LOG_WARNING(log, - "Metadata file {} can't be read by reason: {}. Removing it forcibly.", - backQuote(path), e.nested() ? e.nested()->message() : e.message()); - metadata_disk->removeFile(path); - } - else - throw; - } -} - - -void IDiskRemote::removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove) -{ - checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - - if (metadata_disk->isFile(path)) - { - removeMetadata(path, paths_to_remove[path]); - } - else - { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - removeMetadataRecursive(it->path(), paths_to_remove); - - metadata_disk->removeDirectory(path); - } -} - -std::vector IDiskRemote::getRemotePaths(const String & local_path) const -{ - auto metadata = readMetadata(local_path); - - std::vector remote_paths; - for (const auto & [remote_path, _] : metadata.remote_fs_objects) - remote_paths.push_back(fs::path(metadata.remote_fs_root_path) / remote_path); - - return remote_paths; -} - -void IDiskRemote::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) -{ - /// Protect against concurrent delition of files (for example because of a merge). - if (metadata_disk->isFile(local_path)) - { - try - { - paths_map.emplace_back(local_path, getRemotePaths(local_path)); - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) - return; - throw; - } - } - else - { - DiskDirectoryIteratorPtr it; - try - { - it = iterateDirectory(local_path); - } - catch (const fs::filesystem_error & e) - { - if (e.code() == std::errc::no_such_file_or_directory) - return; - throw; - } - - for (; it->isValid(); it->next()) - IDiskRemote::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); - } -} - -DiskPtr DiskRemoteReservation::getDisk(size_t i) const -{ - if (i != 0) - throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); - return disk; -} - -void DiskRemoteReservation::update(UInt64 new_size) -{ - std::lock_guard lock(disk->reservation_mutex); - disk->reserved_bytes -= size; - size = new_size; - disk->reserved_bytes += size; -} - - -DiskRemoteReservation::~DiskRemoteReservation() -{ - try - { - std::lock_guard lock(disk->reservation_mutex); - if (disk->reserved_bytes < size) - { - disk->reserved_bytes = 0; - LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName()); - } - else - { - disk->reserved_bytes -= size; - } - - if (disk->reservation_count == 0) - LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName()); - else - --disk->reservation_count; - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - - -IDiskRemote::IDiskRemote( - const String & name_, - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - const String & log_name_, - size_t thread_pool_size) - : IDisk(std::make_unique(log_name_, thread_pool_size)) - , log(&Poco::Logger::get(log_name_)) - , name(name_) - , remote_fs_root_path(remote_fs_root_path_) - , metadata_disk(metadata_disk_) - , cache(cache_) -{ -} - - -String IDiskRemote::getCacheBasePath() const -{ - return cache ? cache->getBasePath() : ""; -} - - -bool IDiskRemote::exists(const String & path) const -{ - return metadata_disk->exists(path); -} - - -bool IDiskRemote::isFile(const String & path) const -{ - return metadata_disk->isFile(path); -} - - -void IDiskRemote::createFile(const String & path) -{ - createAndStoreMetadata(path, false); -} - - -size_t IDiskRemote::getFileSize(const String & path) const -{ - return readMetadata(path).total_size; -} - - -void IDiskRemote::moveFile(const String & from_path, const String & to_path) -{ - if (exists(to_path)) - throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); - - metadata_disk->moveFile(from_path, to_path); -} - - -void IDiskRemote::replaceFile(const String & from_path, const String & to_path) -{ - if (exists(to_path)) - { - const String tmp_path = to_path + ".old"; - moveFile(to_path, tmp_path); - moveFile(from_path, to_path); - removeFile(tmp_path); - } - else - moveFile(from_path, to_path); -} - -void IDiskRemote::removeSharedFile(const String & path, bool delete_metadata_only) -{ - std::vector paths_to_remove; - removeMetadata(path, paths_to_remove); - - if (!delete_metadata_only) - removeFromRemoteFS(paths_to_remove); -} - -void IDiskRemote::removeSharedFileIfExists(const String & path, bool delete_metadata_only) -{ - std::vector paths_to_remove; - if (metadata_disk->exists(path)) - { - removeMetadata(path, paths_to_remove); - if (!delete_metadata_only) - removeFromRemoteFS(paths_to_remove); - } -} - -void IDiskRemote::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - std::unordered_map> paths_to_remove; - for (const auto & file : files) - { - bool skip = file.if_exists && !metadata_disk->exists(file.path); - if (!skip) - removeMetadata(file.path, paths_to_remove[file.path]); - } - - if (!keep_all_batch_data) - { - std::vector remove_from_remote; - for (auto && [path, remote_paths] : paths_to_remove) - { - if (!file_names_remove_metadata_only.contains(fs::path(path).filename())) - remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); - } - removeFromRemoteFS(remove_from_remote); - } -} - -void IDiskRemote::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - std::unordered_map> paths_to_remove; - removeMetadataRecursive(path, paths_to_remove); - - if (!keep_all_batch_data) - { - std::vector remove_from_remote; - for (auto && [local_path, remote_paths] : paths_to_remove) - { - if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) - remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); - } - removeFromRemoteFS(remove_from_remote); - } -} - - -void IDiskRemote::setReadOnly(const String & path) -{ - /// We should store read only flag inside metadata file (instead of using FS flag), - /// because we modify metadata file when create hard-links from it. - readUpdateAndStoreMetadata(path, false, [] (Metadata & metadata) { metadata.read_only = true; return true; }); -} - - -bool IDiskRemote::isDirectory(const String & path) const -{ - return metadata_disk->isDirectory(path); -} - - -void IDiskRemote::createDirectory(const String & path) -{ - metadata_disk->createDirectory(path); -} - - -void IDiskRemote::createDirectories(const String & path) -{ - metadata_disk->createDirectories(path); -} - - -void IDiskRemote::clearDirectory(const String & path) -{ - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - if (isFile(it->path())) - removeFile(it->path()); -} - - -void IDiskRemote::removeDirectory(const String & path) -{ - metadata_disk->removeDirectory(path); -} - - -DiskDirectoryIteratorPtr IDiskRemote::iterateDirectory(const String & path) -{ - return metadata_disk->iterateDirectory(path); -} - - -void IDiskRemote::listFiles(const String & path, std::vector & file_names) -{ - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - file_names.push_back(it->name()); -} - - -void IDiskRemote::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ - metadata_disk->setLastModified(path, timestamp); -} - - -Poco::Timestamp IDiskRemote::getLastModified(const String & path) -{ - return metadata_disk->getLastModified(path); -} - - -void IDiskRemote::createHardLink(const String & src_path, const String & dst_path) -{ - readUpdateAndStoreMetadata(src_path, false, [] (Metadata & metadata) { metadata.ref_count++; return true; }); - - /// Create FS hardlink to metadata file. - metadata_disk->createHardLink(src_path, dst_path); -} - - -ReservationPtr IDiskRemote::reserve(UInt64 bytes) -{ - if (!tryReserve(bytes)) - return {}; - - return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); -} - - -bool IDiskRemote::tryReserve(UInt64 bytes) -{ - std::lock_guard lock(reservation_mutex); - if (bytes == 0) - { - LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); - ++reservation_count; - return true; - } - - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); - if (unreserved_space >= bytes) - { - LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", - ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); - ++reservation_count; - reserved_bytes += bytes; - return true; - } - return false; -} - -String IDiskRemote::getUniqueId(const String & path) const -{ - LOG_TRACE(log, "Remote path: {}, Path: {}", remote_fs_root_path, path); - auto metadata = readMetadata(path); - String id; - if (!metadata.remote_fs_objects.empty()) - id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].relative_path; - return id; -} - - -AsynchronousReaderPtr IDiskRemote::getThreadPoolReader() -{ - constexpr size_t pool_size = 50; - constexpr size_t queue_size = 1000000; - static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); - return reader; -} - -UInt32 IDiskRemote::getRefCount(const String & path) const -{ - return readMetadata(path).ref_count; -} - -ThreadPool & IDiskRemote::getThreadPoolWriter() -{ - constexpr size_t pool_size = 100; - constexpr size_t queue_size = 1000000; - static ThreadPool writer(pool_size, pool_size, queue_size); - return writer; -} - -} diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h deleted file mode 100644 index 327452c0bbf..00000000000 --- a/src/Disks/IDiskRemote.h +++ /dev/null @@ -1,302 +0,0 @@ -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric DiskSpaceReservedForMerge; -} - -namespace DB -{ - -class IAsynchronousReader; -using AsynchronousReaderPtr = std::shared_ptr; - - -/// Base Disk class for remote FS's, which are not posix-compatible (e.g. DiskS3, DiskHDFS, DiskBlobStorage) -class IDiskRemote : public IDisk -{ - -friend class DiskRemoteReservation; - -public: - IDiskRemote( - const String & name_, - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - const String & log_name_, - size_t thread_pool_size); - - struct Metadata; - using MetadataUpdater = std::function; - - const String & getName() const final override { return name; } - - const String & getPath() const final override { return metadata_disk->getPath(); } - - String getCacheBasePath() const final override; - - std::vector getRemotePaths(const String & local_path) const final override; - - void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; - - /// Methods for working with metadata. For some operations (like hardlink - /// creation) metadata can be updated concurrently from multiple threads - /// (file actually rewritten on disk). So additional RW lock is required for - /// metadata read and write, but not for create new metadata. - Metadata readMetadata(const String & path) const; - Metadata readMetadataUnlocked(const String & path, std::shared_lock &) const; - Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); - Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater); - - Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater); - - Metadata createAndStoreMetadata(const String & path, bool sync); - Metadata createUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); - - UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } - - UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } - - UInt64 getKeepingFreeSpace() const override { return 0; } - - bool exists(const String & path) const override; - - bool isFile(const String & path) const override; - - void createFile(const String & path) override; - - size_t getFileSize(const String & path) const override; - - void moveFile(const String & from_path, const String & to_path) override; - - void replaceFile(const String & from_path, const String & to_path) override; - - void removeFile(const String & path) override { removeSharedFile(path, false); } - - void removeFileIfExists(const String & path) override { removeSharedFileIfExists(path, false); } - - void removeRecursive(const String & path) override { removeSharedRecursive(path, false, {}); } - - - void removeSharedFile(const String & path, bool delete_metadata_only) override; - - void removeSharedFileIfExists(const String & path, bool delete_metadata_only) override; - - void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - - void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - - void listFiles(const String & path, std::vector & file_names) override; - - void setReadOnly(const String & path) override; - - bool isDirectory(const String & path) const override; - - void createDirectory(const String & path) override; - - void createDirectories(const String & path) override; - - void clearDirectory(const String & path) override; - - void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } - - void removeDirectory(const String & path) override; - - DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; - - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - - Poco::Timestamp getLastModified(const String & path) override; - - void createHardLink(const String & src_path, const String & dst_path) override; - - ReservationPtr reserve(UInt64 bytes) override; - - String getUniqueId(const String & path) const override; - - bool checkUniqueId(const String & id) const override = 0; - - virtual void removeFromRemoteFS(const std::vector & paths) = 0; - - static AsynchronousReaderPtr getThreadPoolReader(); - - static ThreadPool & getThreadPoolWriter(); - - DiskPtr getMetadataDiskIfExistsOrSelf() override { return metadata_disk; } - - UInt32 getRefCount(const String & path) const override; - - /// Return metadata for each file path. Also, before serialization reset - /// ref_count for each metadata to zero. This function used only for remote - /// fetches/sends in replicated engines. That's why we reset ref_count to zero. - std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; -protected: - Poco::Logger * log; - const String name; - const String remote_fs_root_path; - - DiskPtr metadata_disk; - - FileCachePtr cache; - -private: - void removeMetadata(const String & path, std::vector & paths_to_remove); - - void removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove); - - bool tryReserve(UInt64 bytes); - - UInt64 reserved_bytes = 0; - UInt64 reservation_count = 0; - std::mutex reservation_mutex; - mutable std::shared_mutex metadata_mutex; -}; - -using RemoteDiskPtr = std::shared_ptr; - -/// Remote FS (S3, HDFS) metadata file layout: -/// FS objects, their number and total size of all FS objects. -/// Each FS object represents a file path in remote FS and its size. - -struct IDiskRemote::Metadata -{ - using Updater = std::function; - /// Metadata file version. - static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; - static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; - static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; - - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; - - /// URI - const String & remote_fs_root_path; - - /// Relative path to metadata file on local FS. - const String metadata_file_path; - - DiskPtr metadata_disk; - - /// Total size of all remote FS (S3, HDFS) objects. - size_t total_size = 0; - - /// Number of references (hardlinks) to this metadata file. - /// - /// FIXME: Why we are tracking it explicetly, without - /// info from filesystem???? - UInt32 ref_count = 0; - - /// Flag indicates that file is read only. - bool read_only = false; - - Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_); - - void addObject(const String & path, size_t size); - - static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); - static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - - static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); - static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); - - /// Serialize metadata to string (very same with saveToBuffer) - std::string serializeToString(); - -private: - /// Fsync metadata file if 'sync' flag is set. - void save(bool sync = false); - void saveToBuffer(WriteBuffer & buffer, bool sync); - void load(); -}; - -class DiskRemoteReservation final : public IReservation -{ -public: - DiskRemoteReservation(const RemoteDiskPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) - { - } - - UInt64 getSize() const override { return size; } - - DiskPtr getDisk(size_t i) const override; - - Disks getDisks() const override { return {disk}; } - - void update(UInt64 new_size) override; - - ~DiskRemoteReservation() override; - -private: - RemoteDiskPtr disk; - UInt64 size; - CurrentMetrics::Increment metric_increment; -}; - - -/// Runs tasks asynchronously using thread pool. -class AsyncExecutor : public Executor -{ -public: - explicit AsyncExecutor(const String & name_, int thread_pool_size) - : name(name_) - , pool(ThreadPool(thread_pool_size)) {} - - std::future execute(std::function task) override - { - auto promise = std::make_shared>(); - pool.scheduleOrThrowOnError( - [promise, task]() - { - try - { - task(); - promise->set_value(); - } - catch (...) - { - tryLogCurrentException("Failed to run async task"); - - try - { - promise->set_exception(std::current_exception()); - } - catch (...) {} - } - }); - - return promise->get_future(); - } - - void setMaxThreads(size_t threads) - { - pool.setMaxThreads(threads); - } - -private: - String name; - ThreadPool pool; -}; - -} diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 57b72d0190d..c2a317b43b0 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -1,6 +1,5 @@ #include "ReadBufferFromRemoteFSGather.h" -#include #include #include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index ba477ced601..e7eb6296a19 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -1,9 +1,9 @@ #pragma once #include -#include #include #include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index a0669be411f..64495a538e4 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -2,7 +2,6 @@ #include #include -#include #include diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index b2d5f11724a..0d5513e4c01 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h index 84bd2b99c7e..38a1872bb45 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h @@ -2,7 +2,6 @@ #include -#include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 34f396b978c..5f725d92447 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -313,7 +313,7 @@ struct ContextSharedPart /// since it may use per-user MemoryTracker which will be destroyed here. try { - IDiskRemote::getThreadPoolWriter().wait(); + IObjectStorage::getThreadPoolWriter().wait(); } catch (...) { diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 064447c54ad..09c8fe3a6ab 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index fb6a055c6e5..5cc79c1ceee 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB { From e278bfa81ece15e38c7a9b7c056b66e81e5131db Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 May 2022 21:03:49 +0200 Subject: [PATCH 040/150] Fix fast test build --- src/Disks/AzureObjectStorage.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/AzureObjectStorage.cpp index 68f7f63638a..7118d3e0c01 100644 --- a/src/Disks/AzureObjectStorage.cpp +++ b/src/Disks/AzureObjectStorage.cpp @@ -1,4 +1,7 @@ #include + +#if USE_AZURE_BLOB_STORAGE + #include #include #include @@ -211,3 +214,5 @@ std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std } } + +#endif From 6ff221a822b654025a518b9df4abe1c217f57cc6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 12:07:34 +0200 Subject: [PATCH 041/150] Merge with master --- src/Disks/DiskObjectStorage.cpp | 15 +++++++++------ src/Disks/DiskObjectStorage.h | 5 ++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 4b0134b2d07..012a6d5b4c9 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -720,27 +720,30 @@ void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all } } -bool DiskObjectStorage::tryReserve(UInt64 bytes) +std::optional DiskObjectStorage::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (bytes == 0) { LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); ++reservation_count; - return true; + return {unreserved_space}; } - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return true; + return {unreserved_space - bytes}; } - return false; + + return {}; } std::unique_ptr DiskObjectStorage::readFile( diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 5ae014db6ca..7ddd3fa6798 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -253,7 +253,10 @@ class DiskObjectStorageReservation final : public IReservation { public: DiskObjectStorageReservation(const std::shared_ptr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + : disk(disk_) + , size(size_) + , metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + {} UInt64 getSize() const override { return size; } From dc4bc2908eb2c5fe67928645ed5bf3fef80588e2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 15:14:01 +0200 Subject: [PATCH 042/150] Fix azure --- src/Disks/AzureObjectStorage.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/AzureObjectStorage.cpp index 7118d3e0c01..75a602760a7 100644 --- a/src/Disks/AzureObjectStorage.cpp +++ b/src/Disks/AzureObjectStorage.cpp @@ -196,10 +196,9 @@ void AzureObjectStorage::copyObject( /// NOLINT void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); - auto new_client = getAzureBlobContainerClient(config, config_prefix); - - client.set(std::move(new_client)); settings.set(std::move(new_settings)); + + /// We don't update client } From d414d85654863b92de376339aa30d84d8efdff3d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 17:43:48 +0200 Subject: [PATCH 043/150] Fix hdfs bug --- src/Disks/DiskObjectStorage.cpp | 8 ++++++- src/Disks/HDFSObjectStorage.cpp | 21 ++----------------- .../test.py | 8 +++---- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 012a6d5b4c9..73ee5448b2a 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -500,6 +500,9 @@ String DiskObjectStorage::getUniqueId(const String & path) const bool DiskObjectStorage::checkObjectExists(const String & path) const { + if (!path.starts_with(remote_fs_root_path)) + return false; + return object_storage->exists(path); } @@ -714,7 +717,9 @@ void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all for (auto && [local_path, remote_paths] : paths_to_remove) { if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) + { remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); + } } removeFromRemoteFS(remove_from_remote); } @@ -763,6 +768,7 @@ std::unique_ptr DiskObjectStorage::writeFile( const WriteSettings & settings) { auto blob_name = getRandomASCIIString(); + auto blob_path = fs::path(remote_fs_root_path) / blob_name; std::optional object_attributes; if (send_metadata) @@ -781,7 +787,7 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, WriteMode::Rewrite, object_attributes, create_metadata_callback, buf_size, settings); + return object_storage->writeObject(blob_path, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); } diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index 9e99e7aa820..5a1a70f6a50 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -106,14 +106,7 @@ void HDFSObjectStorage::removeObject(const std::string & path) void HDFSObjectStorage::removeObjects(const std::vector & paths) { for (const auto & hdfs_path : paths) - { - const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); - - /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); - if (res == -1) - throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); - } + removeObject(hdfs_path); } void HDFSObjectStorage::removeObjectIfExists(const std::string & path) @@ -125,17 +118,7 @@ void HDFSObjectStorage::removeObjectIfExists(const std::string & path) void HDFSObjectStorage::removeObjectsIfExist(const std::vector & paths) { for (const auto & hdfs_path : paths) - { - if (!exists(hdfs_path)) - continue; - - const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); - - /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); - if (res == -1) - throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); - } + removeObjectIfExists(hdfs_path); } ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py index 1e34a924e39..23f465eaabd 100644 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -77,7 +77,7 @@ def test_hdfs_zero_copy_replication_insert(cluster): ) node1.query("INSERT INTO hdfs_test VALUES (now() - INTERVAL 3 DAY, 10)") - node2.query("SYSTEM SYNC REPLICA hdfs_test") + node2.query("SYSTEM SYNC REPLICA hdfs_test", timeout=30) assert node1.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" assert node2.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" assert ( @@ -192,7 +192,7 @@ def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects): node1.query( "INSERT INTO move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" ) - node2.query("SYSTEM SYNC REPLICA move_test") + node2.query("SYSTEM SYNC REPLICA move_test", timeout=30) assert ( node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") @@ -262,7 +262,7 @@ def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 1 DAY, 11)") node1.query("OPTIMIZE TABLE ttl_move_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_move_test") + node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" @@ -297,7 +297,7 @@ def test_hdfs_zero_copy_with_ttl_delete(cluster): node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 1 DAY, 11)") node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_delete_test") + node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" From 5b08edefd15183b0c5de458e3a3e891c0372ce9e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 20:07:15 +0200 Subject: [PATCH 044/150] Fix I don't understand --- src/Disks/DiskObjectStorage.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 73ee5448b2a..750a009ecf9 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -768,7 +768,6 @@ std::unique_ptr DiskObjectStorage::writeFile( const WriteSettings & settings) { auto blob_name = getRandomASCIIString(); - auto blob_path = fs::path(remote_fs_root_path) / blob_name; std::optional object_attributes; if (send_metadata) @@ -787,7 +786,7 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(blob_path, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); } From c12f826d225f0b3a66287817c9d2a481f860fc79 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 21 May 2022 15:59:14 +0300 Subject: [PATCH 045/150] Implemented changing comment to a ReplicatedMergeTree table --- .../ReplicatedMergeTreeTableMetadata.cpp | 24 +++++++++++++++++-- .../ReplicatedMergeTreeTableMetadata.h | 6 ++++- src/Storages/StorageReplicatedMergeTree.cpp | 8 +++++++ ...2302_ReplicatedMergeTree_comment.reference | 3 +++ .../02302_ReplicatedMergeTree_comment.sql | 23 ++++++++++++++++++ 5 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference create mode 100644 tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 7dee7b8d0f8..393c2eb0dd1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -73,6 +73,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity_bytes = 0; constraints = metadata_snapshot->getConstraints().toString(); + comment = metadata_snapshot->comment; } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -108,6 +109,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!constraints.empty()) out << "constraints: " << constraints << "\n"; + + if (!comment.empty()) + out << "comment: " << quote << comment << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -155,8 +159,18 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) else index_granularity_bytes = 0; - if (checkString("constraints: ", in)) - in >> constraints >> "\n"; + String verb; + readStringUntilWhitespace(verb, in); + + if (verb == "constraints:") + { + in >> " " >> constraints >> "\n"; + + readStringUntilWhitespace(verb, in); + } + + if (verb == "comment:") + in >> " " >> quote >> comment >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -350,6 +364,12 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl diff.new_constraints = from_zk.constraints; } + if (comment != from_zk.comment) + { + diff.comment_changed = true; + diff.comment = from_zk.comment; + } + return diff; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 6d510d20304..246cf863d13 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -30,6 +30,7 @@ struct ReplicatedMergeTreeTableMetadata String projections; String constraints; String ttl_table; + String comment; UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; @@ -61,10 +62,13 @@ struct ReplicatedMergeTreeTableMetadata bool ttl_table_changed = false; String new_ttl_table; + bool comment_changed = false; + String comment; + bool empty() const { return !sorting_key_changed && !sampling_expression_changed && !skip_indices_changed && !projections_changed - && !ttl_table_changed && !constraints_changed; + && !ttl_table_changed && !constraints_changed && !comment_changed; } }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 36080485aca..a72866d1dde 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1151,6 +1151,9 @@ void StorageReplicatedMergeTree::setTableStructure( new_metadata.table_ttl = TTLTableDescription{}; } } + + if (metadata_diff.comment_changed) + new_metadata.comment = metadata_diff.comment; } /// Changes in columns may affect following metadata fields @@ -4776,6 +4779,11 @@ void StorageReplicatedMergeTree::alter( future_metadata_in_zk.ttl_table = ""; } + if (future_metadata.comment != current_metadata->comment) + { + future_metadata_in_zk.comment = future_metadata.comment; + } + String new_indices_str = future_metadata.secondary_indices.toString(); if (new_indices_str != current_metadata->secondary_indices.toString()) future_metadata_in_zk.skip_indices = new_indices_str; diff --git a/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference new file mode 100644 index 00000000000..ea14c4d69b4 --- /dev/null +++ b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference @@ -0,0 +1,3 @@ +Comment text for test table +Some new more detailed text of comment + diff --git a/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql new file mode 100644 index 00000000000..282c90d24bf --- /dev/null +++ b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql @@ -0,0 +1,23 @@ +-- Validate that setting/removing and getting comments on ReplicatedMergeTree works +-- https://github.com/ClickHouse/ClickHouse/issues/36377 + + +CREATE TABLE 02302_ReplicatedMergeTree_comment +( + key UInt64 COMMENT 'The PK' +) +ENGINE = ReplicatedMergeTree('/test/02302_ReplicatedMergeTree_comment/{database}/source', '1') +PARTITION BY key +ORDER BY tuple() +COMMENT 'Comment text for test table'; + +# Check that comment is present +SELECT comment FROM system.tables WHERE database = currentDatabase() AND name == '02302_ReplicatedMergeTree_comment'; + +# Change to a different value and check if it was changed +ALTER TABLE 02302_ReplicatedMergeTree_comment MODIFY COMMENT 'Some new more detailed text of comment'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND name == '02302_ReplicatedMergeTree_comment'; + +# Remove the comment and check if it is empty now +ALTER TABLE 02302_ReplicatedMergeTree_comment MODIFY COMMENT ''; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND name == '02302_ReplicatedMergeTree_comment'; From b3bc0a18a0a7d1725f09474704afdf211fa73980 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 00:13:01 +0200 Subject: [PATCH 046/150] fix test --- src/Disks/DiskObjectStorage.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 750a009ecf9..4d8efdea4cb 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -786,7 +786,11 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); + /// We always use mode Rewrite because we simulate append using metadata and different files + return object_storage->writeObject( + fs::path(remote_fs_root_path) / blob_name, WriteMode::Rewrite, object_attributes, + std::move(create_metadata_callback), + buf_size, settings); } From eb69d963e27b4f00d7d689a46d177a5103890dc8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 12:16:50 +0200 Subject: [PATCH 047/150] Missed change --- src/Disks/DiskObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 4d8efdea4cb..89eb5d4a63d 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes extern const int INCORRECT_DISK_INDEX; extern const int UNKNOWN_FORMAT; extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED;; + extern const int PATH_ACCESS_DENIED; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; extern const int MEMORY_LIMIT_EXCEEDED; From 06c3dd69c002342f60c02bd5a870b1ba7cbf7044 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 13:51:48 +0200 Subject: [PATCH 048/150] Move directories --- src/CMakeLists.txt | 7 +- src/Disks/DiskWebServer.cpp | 3 +- src/Disks/IDisk.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.h | 2 +- .../AzureBlobStorage}/AzureObjectStorage.cpp | 5 +- .../AzureBlobStorage}/AzureObjectStorage.h | 2 +- .../registerDiskAzureBlobStorage.cpp | 7 +- .../DiskObjectStorage.cpp | 522 +---------------- .../{ => ObjectStorages}/DiskObjectStorage.h | 66 +-- .../DiskObjectStorageMetadataHelper.cpp | 541 ++++++++++++++++++ .../DiskObjectStorageMetadataHelper.h | 77 +++ .../HDFS}/HDFSObjectStorage.cpp | 4 +- .../HDFS}/HDFSObjectStorage.h | 2 +- .../HDFS/registerDiskHDFS.cpp | 4 +- .../{ => ObjectStorages}/IObjectStorage.cpp | 3 +- .../{ => ObjectStorages}/IObjectStorage.h | 26 +- .../S3/ProxyConfiguration.h | 0 .../S3/ProxyListConfiguration.cpp | 0 .../S3/ProxyListConfiguration.h | 0 .../S3/ProxyResolverConfiguration.cpp | 0 .../S3/ProxyResolverConfiguration.h | 0 .../S3}/S3ObjectStorage.cpp | 6 +- .../{ => ObjectStorages/S3}/S3ObjectStorage.h | 2 +- .../{ => ObjectStorages}/S3/diskSettings.cpp | 2 +- .../{ => ObjectStorages}/S3/diskSettings.h | 8 +- .../{ => ObjectStorages}/S3/parseConfig.h | 0 .../S3/registerDiskS3.cpp | 13 +- src/Interpreters/Context.cpp | 2 +- 30 files changed, 689 insertions(+), 621 deletions(-) rename src/Disks/{ => ObjectStorages}/AzureBlobStorage/AzureBlobStorageAuth.cpp (98%) rename src/Disks/{ => ObjectStorages}/AzureBlobStorage/AzureBlobStorageAuth.h (87%) rename src/Disks/{ => ObjectStorages/AzureBlobStorage}/AzureObjectStorage.cpp (98%) rename src/Disks/{ => ObjectStorages/AzureBlobStorage}/AzureObjectStorage.h (98%) rename src/Disks/{ => ObjectStorages}/AzureBlobStorage/registerDiskAzureBlobStorage.cpp (95%) rename src/Disks/{ => ObjectStorages}/DiskObjectStorage.cpp (57%) rename src/Disks/{ => ObjectStorages}/DiskObjectStorage.h (79%) create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h rename src/Disks/{ => ObjectStorages/HDFS}/HDFSObjectStorage.cpp (98%) rename src/Disks/{ => ObjectStorages/HDFS}/HDFSObjectStorage.h (98%) rename src/Disks/{ => ObjectStorages}/HDFS/registerDiskHDFS.cpp (94%) rename src/Disks/{ => ObjectStorages}/IObjectStorage.cpp (96%) rename src/Disks/{ => ObjectStorages}/IObjectStorage.h (74%) rename src/Disks/{ => ObjectStorages}/S3/ProxyConfiguration.h (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyListConfiguration.cpp (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyListConfiguration.h (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyResolverConfiguration.cpp (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyResolverConfiguration.h (100%) rename src/Disks/{ => ObjectStorages/S3}/S3ObjectStorage.cpp (99%) rename src/Disks/{ => ObjectStorages/S3}/S3ObjectStorage.h (99%) rename src/Disks/{ => ObjectStorages}/S3/diskSettings.cpp (99%) rename src/Disks/{ => ObjectStorages}/S3/diskSettings.h (74%) rename src/Disks/{ => ObjectStorages}/S3/parseConfig.h (100%) rename src/Disks/{ => ObjectStorages}/S3/registerDiskS3.cpp (91%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8f6e894a100..10bdc464ac6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -87,6 +87,7 @@ add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) add_headers_and_sources(dbms Disks/IO) +add_headers_and_sources(dbms Disks/ObjectStorages) if (TARGET ch_contrib::sqlite) add_headers_and_sources(dbms Databases/SQLite) endif() @@ -113,16 +114,16 @@ endif() if (TARGET ch_contrib::aws_s3) add_headers_and_sources(dbms Common/S3) - add_headers_and_sources(dbms Disks/S3) + add_headers_and_sources(dbms Disks/ObjectStorages/S3) endif() if (TARGET ch_contrib::azure_sdk) - add_headers_and_sources(dbms Disks/AzureBlobStorage) + add_headers_and_sources(dbms Disks/ObjectStorages/AzureBlobStorage) endif() if (TARGET ch_contrib::hdfs) add_headers_and_sources(dbms Storages/HDFS) - add_headers_and_sources(dbms Disks/HDFS) + add_headers_and_sources(dbms Disks/ObjectStorages/HDFS) endif() add_headers_and_sources(dbms Storages/Cache) diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 4f1fc1ad8fb..b8a0d12d6c1 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -10,12 +10,11 @@ #include #include -#include +#include #include #include - #include #include #include diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index ab9f7abae1c..ce6cc84c3f3 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index e7eb6296a19..eb6d26a4c15 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp similarity index 98% rename from src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index c078f584a09..3dcdd47826f 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h similarity index 87% rename from src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h index fcd4fd51b49..6ebe169af50 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h @@ -5,7 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include namespace DB { diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp similarity index 98% rename from src/Disks/AzureObjectStorage.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 75a602760a7..4ea7c609a51 100644 --- a/src/Disks/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE @@ -6,7 +6,8 @@ #include #include #include -#include + +#include namespace DB { diff --git a/src/Disks/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h similarity index 98% rename from src/Disks/AzureObjectStorage.h rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index da6393fd55d..9012449e284 100644 --- a/src/Disks/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp similarity index 95% rename from src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index e111406a587..faaec6ee95c 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -7,9 +7,10 @@ #include #include #include -#include -#include -#include +#include + +#include +#include namespace DB { diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp similarity index 57% rename from src/Disks/DiskObjectStorage.cpp rename to src/Disks/ObjectStorages/DiskObjectStorage.cpp index 89eb5d4a63d..b1a396ffee6 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -14,7 +14,7 @@ #include #include #include - +#include #include namespace DB @@ -858,522 +858,4 @@ DiskObjectStorageReservation::~DiskObjectStorageReservation() } -void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const -{ - const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; - auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); - buf->write('0'); - buf->finalize(); -} - -void DiskObjectStorageMetadataHelper::findLastRevision() -{ - /// Construct revision number from high to low bits. - String revision; - revision.reserve(64); - for (int bit = 0; bit < 64; ++bit) - { - auto revision_prefix = revision + "1"; - - LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix); - - /// Check file or operation with such revision prefix exists. - if (disk->object_storage->exists(disk->remote_fs_root_path + "r" + revision_prefix) - || disk->object_storage->exists(disk->remote_fs_root_path + "operations/r" + revision_prefix)) - revision += "1"; - else - revision += "0"; - } - revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); - LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); -} - -int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) -{ - const std::string path = source_path + SCHEMA_VERSION_OBJECT; - int version = 0; - if (!object_storage->exists(path)) - return version; - - auto buf = object_storage->readObject(path); - readIntText(version, *buf); - - return version; -} - -void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) const -{ - auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; - - auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite); - writeIntText(version, *buf); - buf->finalize(); - -} - -void DiskObjectStorageMetadataHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const -{ - disk->object_storage->copyObject(key, key, metadata); -} - -void DiskObjectStorageMetadataHelper::migrateFileToRestorableSchema(const String & path) const -{ - LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_disk->getPath() + path); - - auto meta = disk->readMetadata(path); - - for (const auto & [key, _] : meta.remote_fs_objects) - { - ObjectAttributes metadata { - {"path", path} - }; - updateObjectMetadata(disk->remote_fs_root_path + key, metadata); - } -} -void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) -{ - checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - - LOG_TRACE(disk->log, "Migrate directory {} to restorable schema", disk->metadata_disk->getPath() + path); - - bool dir_contains_only_files = true; - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - { - if (disk->isDirectory(it->path())) - { - dir_contains_only_files = false; - break; - } - } - - /// The whole directory can be migrated asynchronously. - if (dir_contains_only_files) - { - auto result = disk->getExecutor().execute([this, path] - { - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - migrateFileToRestorableSchema(it->path()); - }); - - results.push_back(std::move(result)); - } - else - { - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - if (!disk->isDirectory(it->path())) - { - auto source_path = it->path(); - auto result = disk->getExecutor().execute([this, source_path] - { - migrateFileToRestorableSchema(source_path); - }); - - results.push_back(std::move(result)); - } - else - migrateToRestorableSchemaRecursive(it->path(), results); - } - -} - -void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() -{ - try - { - LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); - - Futures results; - - for (const auto & root : data_roots) - if (disk->exists(root)) - migrateToRestorableSchemaRecursive(root + '/', results); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - } - catch (const Exception &) - { - tryLogCurrentException(disk->log, fmt::format("Failed to migrate to restorable schema for disk {}", disk->name)); - - throw; - } -} - -void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) -{ - LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name); - - if (!disk->exists(RESTORE_FILE_NAME)) - { - LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME); - return; - } - - try - { - RestoreInformation information; - information.source_path = disk->remote_fs_root_path; - information.source_namespace = disk->object_storage->getObjectsNamespace(); - - readRestoreInformation(information); - if (information.revision == 0) - information.revision = LATEST_REVISION; - if (!information.source_path.ends_with('/')) - information.source_path += '/'; - - IObjectStorage * source_object_storage = disk->object_storage.get(); - if (information.source_namespace == disk->object_storage->getObjectsNamespace()) - { - /// In this case we need to additionally cleanup S3 from objects with later revision. - /// Will be simply just restore to different path. - if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) - throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); - - /// This case complicates S3 cleanup in case of unsuccessful restore. - if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) - throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); - } - else - { - object_storage_from_another_namespace = disk->object_storage->cloneObjectStorage(information.source_namespace, config, config_prefix, context); - source_object_storage = object_storage_from_another_namespace.get(); - } - - LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", - disk->name, information.revision, information.source_path); - - if (readSchemaVersion(source_object_storage, information.source_path) < RESTORABLE_SCHEMA_VERSION) - throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); - - LOG_INFO(disk->log, "Removing old metadata..."); - - bool cleanup_s3 = information.source_path != disk->remote_fs_root_path; - for (const auto & root : data_roots) - if (disk->exists(root)) - disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); - - LOG_INFO(disk->log, "Old metadata removed, restoring new one"); - restoreFiles(source_object_storage, information); - restoreFileOperations(source_object_storage, information); - - disk->metadata_disk->removeFile(RESTORE_FILE_NAME); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - - LOG_INFO(disk->log, "Restore disk {} finished", disk->name); - } - catch (const Exception &) - { - tryLogCurrentException(disk->log, fmt::format("Failed to restore disk {}", disk->name)); - - throw; - } -} - -void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) /// NOLINT -{ - auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); - buffer->next(); - - try - { - std::map properties; - - while (buffer->hasPendingData()) - { - String property; - readText(property, *buffer); - assertChar('\n', *buffer); - - auto pos = property.find('='); - if (pos == std::string::npos || pos == 0 || pos == property.length()) - throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); - - auto key = property.substr(0, pos); - auto value = property.substr(pos + 1); - - auto it = properties.find(key); - if (it != properties.end()) - throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - - properties[key] = value; - } - - for (const auto & [key, value] : properties) - { - ReadBufferFromString value_buffer(value); - - if (key == "revision") - readIntText(restore_information.revision, value_buffer); - else if (key == "source_bucket" || key == "source_namespace") - readText(restore_information.source_namespace, value_buffer); - else if (key == "source_path") - readText(restore_information.source_path, value_buffer); - else if (key == "detached") - readBoolTextWord(restore_information.detached, value_buffer); - else - throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - } - } - catch (const Exception &) - { - tryLogCurrentException(disk->log, "Failed to read restore information"); - throw; - } -} - -static String shrinkKey(const String & path, const String & key) -{ - if (!key.starts_with(path)) - throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); - - return key.substr(path.length()); -} - -static std::tuple extractRevisionAndOperationFromKey(const String & key) -{ - String revision_str; - String operation; - /// Key has format: ../../r{revision}-{operation} - static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; - - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); - - return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; -} - -void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) -{ - LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); - - std::vector> results; - auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) - { - std::vector keys_names; - for (const auto & [key, size] : keys) - { - - LOG_INFO(disk->log, "Calling restore for key for disk {}", key); - - /// Skip file operations objects. They will be processed separately. - if (key.find("/operations/") != String::npos) - continue; - - const auto [revision, _] = extractRevisionAndOperationFromKey(key); - /// Filter early if it's possible to get revision from key. - if (revision > restore_information.revision) - continue; - - keys_names.push_back(key); - } - - if (!keys_names.empty()) - { - auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() - { - processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); - }); - - results.push_back(std::move(result)); - } - - return true; - }; - - BlobsPathToSize children; - source_object_storage->listPrefix(restore_information.source_path, children); - - restore_files(children); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); - -} - -void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const -{ - for (const auto & key : keys) - { - auto meta = source_object_storage->getObjectMetadata(key); - auto object_attributes = meta.attributes; - - String path; - if (object_attributes.has_value()) - { - /// Restore file if object has 'path' in metadata. - auto path_entry = object_attributes->find("path"); - if (path_entry == object_attributes->end()) - { - /// Such keys can remain after migration, we can skip them. - LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); - continue; - } - - path = path_entry->second; - } - else - continue; - - - disk->createDirectories(directoryPath(path)); - auto relative_key = shrinkKey(source_path, key); - - /// Copy object if we restore to different bucket / path. - if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != source_path) - source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); - - auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) - { - metadata.addObject(relative_key, meta.size_bytes); - return true; - }; - - disk->createUpdateAndStoreMetadata(path, false, updater); - - LOG_TRACE(disk->log, "Restored file {}", path); - } - -} - -void DiskObjectStorage::onFreeze(const String & path) -{ - createDirectories(path); - auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); - writeIntText(metadata_helper->revision_counter.load(), *revision_file_buf); - revision_file_buf->finalize(); -} - -static String pathToDetached(const String & source_path) -{ - if (source_path.ends_with('/')) - return fs::path(source_path).parent_path().parent_path() / "detached/"; - return fs::path(source_path).parent_path() / "detached/"; -} - -void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) -{ - /// Enable recording file operations if we restore to different bucket / path. - bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; - - std::set renames; - auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) - { - const String rename = "rename"; - const String hardlink = "hardlink"; - - for (const auto & [key, _]: keys) - { - const auto [revision, operation] = extractRevisionAndOperationFromKey(key); - if (revision == UNKNOWN_REVISION) - { - LOG_WARNING(disk->log, "Skip key {} with unknown revision", key); - continue; - } - - /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). - /// We can stop processing if revision of the object is already more than required. - if (revision > restore_information.revision) - return false; - - /// Keep original revision if restore to different bucket / path. - if (send_metadata) - revision_counter = revision - 1; - - auto object_attributes = *(source_object_storage->getObjectMetadata(key).attributes); - if (operation == rename) - { - auto from_path = object_attributes["from_path"]; - auto to_path = object_attributes["to_path"]; - if (disk->exists(from_path)) - { - disk->moveFile(from_path, to_path, send_metadata); - - LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); - - if (restore_information.detached && disk->isDirectory(to_path)) - { - /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. - if (!from_path.ends_with('/')) - from_path += '/'; - if (!to_path.ends_with('/')) - to_path += '/'; - - /// Always keep latest actual directory path to avoid 'detaching' not existing paths. - auto it = renames.find(from_path); - if (it != renames.end()) - renames.erase(it); - - renames.insert(to_path); - } - } - } - else if (operation == hardlink) - { - auto src_path = object_attributes["src_path"]; - auto dst_path = object_attributes["dst_path"]; - if (disk->exists(src_path)) - { - disk->createDirectories(directoryPath(dst_path)); - disk->createHardLink(src_path, dst_path, send_metadata); - LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); - } - } - } - - return true; - }; - - BlobsPathToSize children; - source_object_storage->listPrefix(restore_information.source_path + "operations/", children); - restore_file_operations(children); - - if (restore_information.detached) - { - Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; - - for (const auto & path : renames) - { - /// Skip already detached parts. - if (path.find("/detached/") != std::string::npos) - continue; - - /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. - fs::path directory_path(path); - auto directory_name = directory_path.parent_path().filename().string(); - - auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; - if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) - continue; - - auto detached_path = pathToDetached(path); - - LOG_TRACE(disk->log, "Move directory to 'detached' {} -> {}", path, detached_path); - - fs::path from_path = fs::path(path); - fs::path to_path = fs::path(detached_path); - if (path.ends_with('/')) - to_path /= from_path.parent_path().filename(); - else - to_path /= from_path.filename(); - - /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename - if (disk->metadata_disk->exists(to_path)) - disk->metadata_disk->removeRecursive(to_path); - - disk->createDirectories(directoryPath(to_path)); - disk->metadata_disk->moveDirectory(from_path, to_path); - } - } - - LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); -} - } diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h similarity index 79% rename from src/Disks/DiskObjectStorage.h rename to src/Disks/ObjectStorages/DiskObjectStorage.h index 7ddd3fa6798..e7cbb04ff99 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -1,7 +1,8 @@ #pragma once #include -#include +#include +#include #include namespace CurrentMetrics @@ -12,8 +13,6 @@ namespace CurrentMetrics namespace DB { -class DiskObjectStorageMetadataHelper; - class DiskObjectStorage : public IDisk { @@ -41,11 +40,11 @@ public: struct Metadata; using MetadataUpdater = std::function; - const String & getName() const final override { return name; } + const String & getName() const override { return name; } - const String & getPath() const final override { return metadata_disk->getPath(); } + const String & getPath() const override { return metadata_disk->getPath(); } - std::vector getRemotePaths(const String & local_path) const final override; + std::vector getRemotePaths(const String & local_path) const override; void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; @@ -277,59 +276,4 @@ private: CurrentMetrics::Increment metric_increment; }; -class DiskObjectStorageMetadataHelper -{ -public: - static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); - static constexpr UInt64 UNKNOWN_REVISION = 0; - - DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) - : disk(disk_) - , read_settings(std::move(read_settings_)) - { - } - - struct RestoreInformation - { - UInt64 revision = LATEST_REVISION; - String source_namespace; - String source_path; - bool detached = false; - }; - - using Futures = std::vector>; - - void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; - void findLastRevision(); - - static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); - void saveSchemaVersion(const int & version) const; - void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; - void migrateFileToRestorableSchema(const String & path) const; - void migrateToRestorableSchemaRecursive(const String & path, Futures & results); - void migrateToRestorableSchema(); - - void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); - void readRestoreInformation(RestoreInformation & restore_information); - void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; - void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - - std::atomic revision_counter = 0; - inline static const String RESTORE_FILE_NAME = "restore"; - - /// Object contains information about schema version. - inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; - /// Version with possibility to backup-restore metadata. - static constexpr int RESTORABLE_SCHEMA_VERSION = 1; - /// Directories with data. - const std::vector data_roots {"data", "store"}; - - DiskObjectStorage * disk; - - ObjectStoragePtr object_storage_from_another_namespace; - - ReadSettings read_settings; -}; - } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp new file mode 100644 index 00000000000..8e680663358 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -0,0 +1,541 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_FORMAT; + extern const int LOGICAL_ERROR; +} + +static String revisionToString(UInt64 revision) +{ + return std::bitset<64>(revision).to_string(); +} + +void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const +{ + const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); + buf->write('0'); + buf->finalize(); +} + +void DiskObjectStorageMetadataHelper::findLastRevision() +{ + /// Construct revision number from high to low bits. + String revision; + revision.reserve(64); + for (int bit = 0; bit < 64; ++bit) + { + auto revision_prefix = revision + "1"; + + LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix); + + /// Check file or operation with such revision prefix exists. + if (disk->object_storage->exists(disk->remote_fs_root_path + "r" + revision_prefix) + || disk->object_storage->exists(disk->remote_fs_root_path + "operations/r" + revision_prefix)) + revision += "1"; + else + revision += "0"; + } + revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); + LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); +} + +int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) +{ + const std::string path = source_path + SCHEMA_VERSION_OBJECT; + int version = 0; + if (!object_storage->exists(path)) + return version; + + auto buf = object_storage->readObject(path); + readIntText(version, *buf); + + return version; +} + +void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) const +{ + auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; + + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite); + writeIntText(version, *buf); + buf->finalize(); + +} + +void DiskObjectStorageMetadataHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const +{ + disk->object_storage->copyObject(key, key, metadata); +} + +void DiskObjectStorageMetadataHelper::migrateFileToRestorableSchema(const String & path) const +{ + LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_disk->getPath() + path); + + auto meta = disk->readMetadata(path); + + for (const auto & [key, _] : meta.remote_fs_objects) + { + ObjectAttributes metadata { + {"path", path} + }; + updateObjectMetadata(disk->remote_fs_root_path + key, metadata); + } +} +void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) +{ + checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. + + LOG_TRACE(disk->log, "Migrate directory {} to restorable schema", disk->metadata_disk->getPath() + path); + + bool dir_contains_only_files = true; + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + { + if (disk->isDirectory(it->path())) + { + dir_contains_only_files = false; + break; + } + } + + /// The whole directory can be migrated asynchronously. + if (dir_contains_only_files) + { + auto result = disk->getExecutor().execute([this, path] + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + migrateFileToRestorableSchema(it->path()); + }); + + results.push_back(std::move(result)); + } + else + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + if (!disk->isDirectory(it->path())) + { + auto source_path = it->path(); + auto result = disk->getExecutor().execute([this, source_path] + { + migrateFileToRestorableSchema(source_path); + }); + + results.push_back(std::move(result)); + } + else + migrateToRestorableSchemaRecursive(it->path(), results); + } + +} + +void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() +{ + try + { + LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); + + Futures results; + + for (const auto & root : data_roots) + if (disk->exists(root)) + migrateToRestorableSchemaRecursive(root + '/', results); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to migrate to restorable schema for disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name); + + if (!disk->exists(RESTORE_FILE_NAME)) + { + LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME); + return; + } + + try + { + RestoreInformation information; + information.source_path = disk->remote_fs_root_path; + information.source_namespace = disk->object_storage->getObjectsNamespace(); + + readRestoreInformation(information); + if (information.revision == 0) + information.revision = LATEST_REVISION; + if (!information.source_path.ends_with('/')) + information.source_path += '/'; + + IObjectStorage * source_object_storage = disk->object_storage.get(); + if (information.source_namespace == disk->object_storage->getObjectsNamespace()) + { + /// In this case we need to additionally cleanup S3 from objects with later revision. + /// Will be simply just restore to different path. + if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) + throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); + + /// This case complicates S3 cleanup in case of unsuccessful restore. + if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) + throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + } + else + { + object_storage_from_another_namespace = disk->object_storage->cloneObjectStorage(information.source_namespace, config, config_prefix, context); + source_object_storage = object_storage_from_another_namespace.get(); + } + + LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", + disk->name, information.revision, information.source_path); + + if (readSchemaVersion(source_object_storage, information.source_path) < RESTORABLE_SCHEMA_VERSION) + throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); + + LOG_INFO(disk->log, "Removing old metadata..."); + + bool cleanup_s3 = information.source_path != disk->remote_fs_root_path; + for (const auto & root : data_roots) + if (disk->exists(root)) + disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); + + LOG_INFO(disk->log, "Old metadata removed, restoring new one"); + restoreFiles(source_object_storage, information); + restoreFileOperations(source_object_storage, information); + + disk->metadata_disk->removeFile(RESTORE_FILE_NAME); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + + LOG_INFO(disk->log, "Restore disk {} finished", disk->name); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to restore disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) /// NOLINT +{ + auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); + buffer->next(); + + try + { + std::map properties; + + while (buffer->hasPendingData()) + { + String property; + readText(property, *buffer); + assertChar('\n', *buffer); + + auto pos = property.find('='); + if (pos == std::string::npos || pos == 0 || pos == property.length()) + throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); + + auto key = property.substr(0, pos); + auto value = property.substr(pos + 1); + + auto it = properties.find(key); + if (it != properties.end()) + throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + + properties[key] = value; + } + + for (const auto & [key, value] : properties) + { + ReadBufferFromString value_buffer(value); + + if (key == "revision") + readIntText(restore_information.revision, value_buffer); + else if (key == "source_bucket" || key == "source_namespace") + readText(restore_information.source_namespace, value_buffer); + else if (key == "source_path") + readText(restore_information.source_path, value_buffer); + else if (key == "detached") + readBoolTextWord(restore_information.detached, value_buffer); + else + throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + } + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, "Failed to read restore information"); + throw; + } +} + +static String shrinkKey(const String & path, const String & key) +{ + if (!key.starts_with(path)) + throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); + + return key.substr(path.length()); +} + +static std::tuple extractRevisionAndOperationFromKey(const String & key) +{ + String revision_str; + String operation; + /// Key has format: ../../r{revision}-{operation} + static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + + re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + + return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; +} + +void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) +{ + LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); + + std::vector> results; + auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) + { + std::vector keys_names; + for (const auto & [key, size] : keys) + { + + LOG_INFO(disk->log, "Calling restore for key for disk {}", key); + + /// Skip file operations objects. They will be processed separately. + if (key.find("/operations/") != String::npos) + continue; + + const auto [revision, _] = extractRevisionAndOperationFromKey(key); + /// Filter early if it's possible to get revision from key. + if (revision > restore_information.revision) + continue; + + keys_names.push_back(key); + } + + if (!keys_names.empty()) + { + auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() + { + processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); + }); + + results.push_back(std::move(result)); + } + + return true; + }; + + BlobsPathToSize children; + source_object_storage->listPrefix(restore_information.source_path, children); + + restore_files(children); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); + +} + +void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const +{ + for (const auto & key : keys) + { + auto meta = source_object_storage->getObjectMetadata(key); + auto object_attributes = meta.attributes; + + String path; + if (object_attributes.has_value()) + { + /// Restore file if object has 'path' in metadata. + auto path_entry = object_attributes->find("path"); + if (path_entry == object_attributes->end()) + { + /// Such keys can remain after migration, we can skip them. + LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); + continue; + } + + path = path_entry->second; + } + else + continue; + + + disk->createDirectories(directoryPath(path)); + auto relative_key = shrinkKey(source_path, key); + + /// Copy object if we restore to different bucket / path. + if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != source_path) + source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); + + auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) + { + metadata.addObject(relative_key, meta.size_bytes); + return true; + }; + + disk->createUpdateAndStoreMetadata(path, false, updater); + + LOG_TRACE(disk->log, "Restored file {}", path); + } + +} + +void DiskObjectStorage::onFreeze(const String & path) +{ + createDirectories(path); + auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); + writeIntText(metadata_helper->revision_counter.load(), *revision_file_buf); + revision_file_buf->finalize(); +} + +static String pathToDetached(const String & source_path) +{ + if (source_path.ends_with('/')) + return fs::path(source_path).parent_path().parent_path() / "detached/"; + return fs::path(source_path).parent_path() / "detached/"; +} + +void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) +{ + /// Enable recording file operations if we restore to different bucket / path. + bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; + + std::set renames; + auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + { + const String rename = "rename"; + const String hardlink = "hardlink"; + + for (const auto & [key, _]: keys) + { + const auto [revision, operation] = extractRevisionAndOperationFromKey(key); + if (revision == UNKNOWN_REVISION) + { + LOG_WARNING(disk->log, "Skip key {} with unknown revision", key); + continue; + } + + /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). + /// We can stop processing if revision of the object is already more than required. + if (revision > restore_information.revision) + return false; + + /// Keep original revision if restore to different bucket / path. + if (send_metadata) + revision_counter = revision - 1; + + auto object_attributes = *(source_object_storage->getObjectMetadata(key).attributes); + if (operation == rename) + { + auto from_path = object_attributes["from_path"]; + auto to_path = object_attributes["to_path"]; + if (disk->exists(from_path)) + { + disk->moveFile(from_path, to_path, send_metadata); + + LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); + + if (restore_information.detached && disk->isDirectory(to_path)) + { + /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. + if (!from_path.ends_with('/')) + from_path += '/'; + if (!to_path.ends_with('/')) + to_path += '/'; + + /// Always keep latest actual directory path to avoid 'detaching' not existing paths. + auto it = renames.find(from_path); + if (it != renames.end()) + renames.erase(it); + + renames.insert(to_path); + } + } + } + else if (operation == hardlink) + { + auto src_path = object_attributes["src_path"]; + auto dst_path = object_attributes["dst_path"]; + if (disk->exists(src_path)) + { + disk->createDirectories(directoryPath(dst_path)); + disk->createHardLink(src_path, dst_path, send_metadata); + LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); + } + } + } + + return true; + }; + + BlobsPathToSize children; + source_object_storage->listPrefix(restore_information.source_path + "operations/", children); + restore_file_operations(children); + + if (restore_information.detached) + { + Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; + + for (const auto & path : renames) + { + /// Skip already detached parts. + if (path.find("/detached/") != std::string::npos) + continue; + + /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. + fs::path directory_path(path); + auto directory_name = directory_path.parent_path().filename().string(); + + auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; + if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) + continue; + + auto detached_path = pathToDetached(path); + + LOG_TRACE(disk->log, "Move directory to 'detached' {} -> {}", path, detached_path); + + fs::path from_path = fs::path(path); + fs::path to_path = fs::path(detached_path); + if (path.ends_with('/')) + to_path /= from_path.parent_path().filename(); + else + to_path /= from_path.filename(); + + /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename + if (disk->metadata_disk->exists(to_path)) + disk->metadata_disk->removeRecursive(to_path); + + disk->createDirectories(directoryPath(to_path)); + disk->metadata_disk->moveDirectory(from_path, to_path); + } + } + + LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); +} + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h new file mode 100644 index 00000000000..2d8775030e5 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -0,0 +1,77 @@ +#pragma once + +#include + +namespace DB +{ + +class DiskObjectStorage; + +/// Class implements storage of ObjectStorage metadata inside object storage itself, +/// so it's possible to recover from this remote information in case of local disk loss. +/// +/// This machanism can be enabled with `true` option inside +/// disk configuration. Implemented only for S3 and Azure Block storage. Other object storages +/// doesn't support metadata for blobs. +/// +/// FIXME: this class is very intrusive and use a lot of DiskObjectStorage internals. +/// FIXME: it's very complex and unreliable, need to implement something better. +class DiskObjectStorageMetadataHelper +{ +public: + static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); + static constexpr UInt64 UNKNOWN_REVISION = 0; + + DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) + : disk(disk_) + , read_settings(std::move(read_settings_)) + { + } + + struct RestoreInformation + { + UInt64 revision = LATEST_REVISION; + String source_namespace; + String source_path; + bool detached = false; + }; + + using Futures = std::vector>; + + void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + + void findLastRevision(); + + static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); + void saveSchemaVersion(const int & version) const; + void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; + void migrateFileToRestorableSchema(const String & path) const; + void migrateToRestorableSchemaRecursive(const String & path, Futures & results); + void migrateToRestorableSchema(); + + /// Most important method, called on DiskObjectStorage startup + void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); + + void readRestoreInformation(RestoreInformation & restore_information); + void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); + void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; + void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); + + std::atomic revision_counter = 0; + inline static const String RESTORE_FILE_NAME = "restore"; + + /// Object contains information about schema version. + inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; + /// Version with possibility to backup-restore metadata. + static constexpr int RESTORABLE_SCHEMA_VERSION = 1; + /// Directories with data. + const std::vector data_roots {"data", "store"}; + + DiskObjectStorage * disk; + + ObjectStoragePtr object_storage_from_another_namespace; + + ReadSettings read_settings; +}; + +} diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp similarity index 98% rename from src/Disks/HDFSObjectStorage.cpp rename to src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 5a1a70f6a50..82dad1dece0 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -1,9 +1,11 @@ -#include +#include #include #include + #include #include + #include #include #include diff --git a/src/Disks/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h similarity index 98% rename from src/Disks/HDFSObjectStorage.h rename to src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 3c1bac02ee3..a9a223a3d7e 100644 --- a/src/Disks/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -5,7 +5,7 @@ #if USE_HDFS #include -#include +#include #include #include #include diff --git a/src/Disks/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp similarity index 94% rename from src/Disks/HDFS/registerDiskHDFS.cpp rename to src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index f67f6fbb440..0041da6881d 100644 --- a/src/Disks/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp similarity index 96% rename from src/Disks/IObjectStorage.cpp rename to src/Disks/ObjectStorages/IObjectStorage.cpp index 1997022d05c..3941ad8b6f6 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -20,7 +20,6 @@ ThreadPool & IObjectStorage::getThreadPoolWriter() return writer; } - std::string IObjectStorage::getCacheBasePath() const { return cache ? cache->getBasePath() : ""; diff --git a/src/Disks/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h similarity index 74% rename from src/Disks/IObjectStorage.h rename to src/Disks/ObjectStorages/IObjectStorage.h index e5eb08f145d..ebf3ba54324 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -52,6 +52,9 @@ struct ObjectMetadata using FinalizeCallback = std::function; +/// Base class for all object storages which implement some subset of ordinary filesystem operations. +/// +/// Examples of object storages are S3, Azure Blob Storage, HDFS. class IObjectStorage { public: @@ -59,18 +62,24 @@ public: : cache(std::move(cache_)) {} + /// Path exists or not virtual bool exists(const std::string & path) const = 0; + /// List on prefix, return childs with their sizes. virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; + /// Get object metadata if supported. It should be possible to receive + /// at least size of object virtual ObjectMetadata getObjectMetadata(const std::string & path) const = 0; + /// Read single path from object storage, don't use cache virtual std::unique_ptr readObject( /// NOLINT const std::string & path, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; + /// Read multiple objects with common prefix, use cache virtual std::unique_ptr readObjects( /// NOLINT const std::string & common_path_prefix, const BlobsPathToSize & blobs_to_read, @@ -87,21 +96,28 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) = 0; - /// Remove file. Throws exception if file doesn't exists or it's a directory. + /// Remove object. Throws exception if object doesn't exists. virtual void removeObject(const std::string & path) = 0; + /// Remove multiple objects. Some object storages can do batch remove in a more + /// optimal way. virtual void removeObjects(const std::vector & paths) = 0; - /// Remove file if it exists. + /// Remove object on path if exists virtual void removeObjectIfExists(const std::string & path) = 0; + /// Remove objects on path if exists virtual void removeObjectsIfExist(const std::vector & paths) = 0; + /// Copy object with different attributes if required virtual void copyObject( /// NOLINT const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + /// Copy object to another instance of object storage + /// by default just read the object from source object storage and write + /// to destination through buffers. virtual void copyObjectToAnotherObjectStorage( /// NOLINT const std::string & object_from, const std::string & object_to, @@ -110,6 +126,7 @@ public: virtual ~IObjectStorage() = default; + /// Path to directory with objects cache std::string getCacheBasePath() const; static AsynchronousReaderPtr getThreadPoolReader(); @@ -122,10 +139,15 @@ public: void removeFromCache(const std::string & path); + /// Apply new settings, in most cases reiniatilize client and some other staff virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + /// Sometimes object storages have something similar to chroot or namespace, for example + /// buckets in S3. If object storage doesn't have any namepaces return empty string. virtual String getObjectsNamespace() const = 0; + /// FIXME: confusing function required for a very specific case. Create new instance of object storage + /// in different namespace. virtual std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; protected: diff --git a/src/Disks/S3/ProxyConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyConfiguration.h similarity index 100% rename from src/Disks/S3/ProxyConfiguration.h rename to src/Disks/ObjectStorages/S3/ProxyConfiguration.h diff --git a/src/Disks/S3/ProxyListConfiguration.cpp b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp similarity index 100% rename from src/Disks/S3/ProxyListConfiguration.cpp rename to src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp diff --git a/src/Disks/S3/ProxyListConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h similarity index 100% rename from src/Disks/S3/ProxyListConfiguration.h rename to src/Disks/ObjectStorages/S3/ProxyListConfiguration.h diff --git a/src/Disks/S3/ProxyResolverConfiguration.cpp b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp similarity index 100% rename from src/Disks/S3/ProxyResolverConfiguration.cpp rename to src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp diff --git a/src/Disks/S3/ProxyResolverConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h similarity index 100% rename from src/Disks/S3/ProxyResolverConfiguration.h rename to src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp similarity index 99% rename from src/Disks/S3ObjectStorage.cpp rename to src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 9c33553c6b0..5c8287e5d84 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -1,5 +1,4 @@ -#include - +#include #if USE_AWS_S3 @@ -13,8 +12,7 @@ #include #include #include -#include - +#include #include #include diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h similarity index 99% rename from src/Disks/S3ObjectStorage.h rename to src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 12a8930c596..dc939e5e9dd 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -4,7 +4,7 @@ #if USE_AWS_S3 -#include +#include #include #include #include diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp similarity index 99% rename from src/Disks/S3/diskSettings.cpp rename to src/Disks/ObjectStorages/S3/diskSettings.cpp index 9f170799bb9..d54ef02ec7c 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/Disks/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h similarity index 74% rename from src/Disks/S3/diskSettings.h rename to src/Disks/ObjectStorages/S3/diskSettings.h index d8784d0b5a5..fc5a09ce825 100644 --- a/src/Disks/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -6,12 +6,12 @@ #include #include -#include +#include #include #include -#include -#include -#include +#include +#include +#include #include #include #include diff --git a/src/Disks/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h similarity index 100% rename from src/Disks/S3/parseConfig.h rename to src/Disks/ObjectStorages/S3/parseConfig.h diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp similarity index 91% rename from src/Disks/S3/registerDiskS3.cpp rename to src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 5da49be12e4..fe3aa78b0b3 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -4,22 +4,23 @@ #include #include #include -#include "Disks/DiskFactory.h" +#include #if USE_AWS_S3 #include #include -#include +#include +#include #include #include -#include -#include -#include +#include +#include +#include #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d862bd4dd5c..20aa6dbb6c7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include From 0c615798592866f43161455e99ac4db2c2031c98 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 23:15:42 +0200 Subject: [PATCH 049/150] More comments --- .../ObjectStorages/DiskObjectStorage.cpp | 195 ---------------- src/Disks/ObjectStorages/DiskObjectStorage.h | 65 +----- .../DiskObjectStorageMetadata.cpp | 208 ++++++++++++++++++ .../DiskObjectStorageMetadata.h | 68 ++++++ .../DiskObjectStorageMetadataHelper.cpp | 1 + .../DiskObjectStorageMetadataHelper.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- 7 files changed, 286 insertions(+), 255 deletions(-) create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadata.h diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index b1a396ffee6..163e2087bed 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -25,12 +25,8 @@ namespace ErrorCodes extern const int INCORRECT_DISK_INDEX; extern const int UNKNOWN_FORMAT; extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; - extern const int MEMORY_LIMIT_EXCEEDED; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; } static String revisionToString(UInt64 revision) @@ -107,197 +103,6 @@ DiskObjectStorage::DiskObjectStorage( , metadata_helper(std::make_unique(this, ReadSettings{})) {} -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - return result; -} - - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.save(sync); - return result; -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - return result; -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - updater(result); - result.save(sync); - return result; -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - metadata_disk_->removeFile(metadata_file_path_); - - return result; - -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) -{ - if (overwrite || !metadata_disk_->exists(metadata_file_path_)) - { - return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); - } - else - { - auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - if (result.read_only) - throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); - return result; - } -} - -void DiskObjectStorage::Metadata::load() -{ - try - { - const ReadSettings read_settings; - auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ - - UInt32 version; - readIntText(version, *buf); - - if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) - throw Exception( - ErrorCodes::UNKNOWN_FORMAT, - "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", - metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); - - assertChar('\n', *buf); - - UInt32 remote_fs_objects_count; - readIntText(remote_fs_objects_count, *buf); - assertChar('\t', *buf); - readIntText(total_size, *buf); - assertChar('\n', *buf); - remote_fs_objects.resize(remote_fs_objects_count); - - for (size_t i = 0; i < remote_fs_objects_count; ++i) - { - String remote_fs_object_path; - size_t remote_fs_object_size; - readIntText(remote_fs_object_size, *buf); - assertChar('\t', *buf); - readEscapedString(remote_fs_object_path, *buf); - if (version == VERSION_ABSOLUTE_PATHS) - { - if (!remote_fs_object_path.starts_with(remote_fs_root_path)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, - "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", - remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); - - remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); - } - assertChar('\n', *buf); - remote_fs_objects[i].relative_path = remote_fs_object_path; - remote_fs_objects[i].bytes_size = remote_fs_object_size; - } - - readIntText(ref_count, *buf); - assertChar('\n', *buf); - - if (version >= VERSION_READ_ONLY_FLAG) - { - readBoolText(read_only, *buf); - assertChar('\n', *buf); - } - } - catch (Exception & e) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - - if (e.code() == ErrorCodes::UNKNOWN_FORMAT) - throw; - - if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) - throw; - - throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT); - } -} - -/// Load metadata by path or create empty if `create` flag is set. -DiskObjectStorage::Metadata::Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_) - : remote_fs_root_path(remote_fs_root_path_) - , metadata_file_path(metadata_file_path_) - , metadata_disk(metadata_disk_) - , total_size(0), ref_count(0) -{ -} - -void DiskObjectStorage::Metadata::addObject(const String & path, size_t size) -{ - total_size += size; - remote_fs_objects.emplace_back(path, size); -} - - -void DiskObjectStorage::Metadata::saveToBuffer(WriteBuffer & buf, bool sync) -{ - writeIntText(VERSION_RELATIVE_PATHS, buf); - writeChar('\n', buf); - - writeIntText(remote_fs_objects.size(), buf); - writeChar('\t', buf); - writeIntText(total_size, buf); - writeChar('\n', buf); - - for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) - { - writeIntText(remote_fs_object_size, buf); - writeChar('\t', buf); - writeEscapedString(remote_fs_object_path, buf); - writeChar('\n', buf); - } - - writeIntText(ref_count, buf); - writeChar('\n', buf); - - writeBoolText(read_only, buf); - writeChar('\n', buf); - - buf.finalize(); - if (sync) - buf.sync(); - -} - -/// Fsync metadata file if 'sync' flag is set. -void DiskObjectStorage::Metadata::save(bool sync) -{ - auto buf = metadata_disk->writeFile(metadata_file_path, 1024); - saveToBuffer(*buf, sync); -} - -std::string DiskObjectStorage::Metadata::serializeToString() -{ - WriteBufferFromOwnString write_buf; - saveToBuffer(write_buf, false); - return write_buf.str(); -} - DiskObjectStorage::Metadata DiskObjectStorage::readMetadataUnlocked(const String & path, std::shared_lock &) const { return Metadata::readMetadata(remote_fs_root_path, metadata_disk, path); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index e7cbb04ff99..9a60a7ad25e 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace CurrentMetrics @@ -13,7 +14,11 @@ namespace CurrentMetrics namespace DB { - +/// Disk build on top of IObjectStorage. Use additional disk (local for example) +/// for metadata storage. Metadata is a small files with mapping from local paths to +/// objects in object storage, like: +/// "/var/lib/clickhouse/data/db/table/all_0_0_0/columns.txt" -> /xxxxxxxxxxxxxxxxxxxx +/// -> /yyyyyyyyyyyyyyyyyyyy class DiskObjectStorage : public IDisk { @@ -37,7 +42,7 @@ public: bool supportParallelWrite() const override { return true; } - struct Metadata; + using Metadata = DiskObjectStorageMetadata; using MetadataUpdater = std::function; const String & getName() const override { return name; } @@ -192,62 +197,6 @@ private: std::unique_ptr metadata_helper; }; -struct DiskObjectStorage::Metadata -{ - using Updater = std::function; - /// Metadata file version. - static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; - static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; - static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; - - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; - - /// URI - const String & remote_fs_root_path; - - /// Relative path to metadata file on local FS. - const String metadata_file_path; - - DiskPtr metadata_disk; - - /// Total size of all remote FS (S3, HDFS) objects. - size_t total_size = 0; - - /// Number of references (hardlinks) to this metadata file. - /// - /// FIXME: Why we are tracking it explicetly, without - /// info from filesystem???? - UInt32 ref_count = 0; - - /// Flag indicates that file is read only. - bool read_only = false; - - Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_); - - void addObject(const String & path, size_t size); - - static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); - static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - - static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); - static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); - - /// Serialize metadata to string (very same with saveToBuffer) - std::string serializeToString(); - -private: - /// Fsync metadata file if 'sync' flag is set. - void save(bool sync = false); - void saveToBuffer(WriteBuffer & buffer, bool sync); - void load(); -}; - class DiskObjectStorageReservation final : public IReservation { public: diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp new file mode 100644 index 00000000000..2e1ef31f8f0 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -0,0 +1,208 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_FORMAT; + extern const int PATH_ACCESS_DENIED; + extern const int MEMORY_LIMIT_EXCEEDED; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + return result; +} + + +DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.save(sync); + return result; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + return result; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + updater(result); + result.save(sync); + return result; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + metadata_disk_->removeFile(metadata_file_path_); + + return result; + +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) +{ + if (overwrite || !metadata_disk_->exists(metadata_file_path_)) + { + return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); + } + else + { + auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + if (result.read_only) + throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); + return result; + } +} + +void DiskObjectStorageMetadata::load() +{ + try + { + const ReadSettings read_settings; + auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ + + UInt32 version; + readIntText(version, *buf); + + if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) + throw Exception( + ErrorCodes::UNKNOWN_FORMAT, + "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", + metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); + + assertChar('\n', *buf); + + UInt32 remote_fs_objects_count; + readIntText(remote_fs_objects_count, *buf); + assertChar('\t', *buf); + readIntText(total_size, *buf); + assertChar('\n', *buf); + remote_fs_objects.resize(remote_fs_objects_count); + + for (size_t i = 0; i < remote_fs_objects_count; ++i) + { + String remote_fs_object_path; + size_t remote_fs_object_size; + readIntText(remote_fs_object_size, *buf); + assertChar('\t', *buf); + readEscapedString(remote_fs_object_path, *buf); + if (version == VERSION_ABSOLUTE_PATHS) + { + if (!remote_fs_object_path.starts_with(remote_fs_root_path)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, + "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", + remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); + + remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); + } + assertChar('\n', *buf); + remote_fs_objects[i].relative_path = remote_fs_object_path; + remote_fs_objects[i].bytes_size = remote_fs_object_size; + } + + readIntText(ref_count, *buf); + assertChar('\n', *buf); + + if (version >= VERSION_READ_ONLY_FLAG) + { + readBoolText(read_only, *buf); + assertChar('\n', *buf); + } + } + catch (Exception & e) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) + throw; + + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + throw; + + throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT); + } +} + +/// Load metadata by path or create empty if `create` flag is set. +DiskObjectStorageMetadata::DiskObjectStorageMetadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_) + : remote_fs_root_path(remote_fs_root_path_) + , metadata_file_path(metadata_file_path_) + , metadata_disk(metadata_disk_) + , total_size(0), ref_count(0) +{ +} + +void DiskObjectStorageMetadata::addObject(const String & path, size_t size) +{ + total_size += size; + remote_fs_objects.emplace_back(path, size); +} + + +void DiskObjectStorageMetadata::saveToBuffer(WriteBuffer & buf, bool sync) +{ + writeIntText(VERSION_RELATIVE_PATHS, buf); + writeChar('\n', buf); + + writeIntText(remote_fs_objects.size(), buf); + writeChar('\t', buf); + writeIntText(total_size, buf); + writeChar('\n', buf); + + for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) + { + writeIntText(remote_fs_object_size, buf); + writeChar('\t', buf); + writeEscapedString(remote_fs_object_path, buf); + writeChar('\n', buf); + } + + writeIntText(ref_count, buf); + writeChar('\n', buf); + + writeBoolText(read_only, buf); + writeChar('\n', buf); + + buf.finalize(); + if (sync) + buf.sync(); + +} + +/// Fsync metadata file if 'sync' flag is set. +void DiskObjectStorageMetadata::save(bool sync) +{ + auto buf = metadata_disk->writeFile(metadata_file_path, 1024); + saveToBuffer(*buf, sync); +} + +std::string DiskObjectStorageMetadata::serializeToString() +{ + WriteBufferFromOwnString write_buf; + saveToBuffer(write_buf, false); + return write_buf.str(); +} + + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h new file mode 100644 index 00000000000..ba6b7f952fc --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Metadata for DiskObjectStorage, stored on local disk +struct DiskObjectStorageMetadata +{ + using Updater = std::function; + /// Metadata file version. + static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; + static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; + static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; + + /// Remote FS objects paths and their sizes. + std::vector remote_fs_objects; + + /// URI + const String & remote_fs_root_path; + + /// Relative path to metadata file on local FS. + const String metadata_file_path; + + DiskPtr metadata_disk; + + /// Total size of all remote FS (S3, HDFS) objects. + size_t total_size = 0; + + /// Number of references (hardlinks) to this metadata file. + /// + /// FIXME: Why we are tracking it explicetly, without + /// info from filesystem???? + UInt32 ref_count = 0; + + /// Flag indicates that file is read only. + bool read_only = false; + + DiskObjectStorageMetadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_); + + void addObject(const String & path, size_t size); + + static DiskObjectStorageMetadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); + static DiskObjectStorageMetadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static DiskObjectStorageMetadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + + static DiskObjectStorageMetadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); + static DiskObjectStorageMetadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static DiskObjectStorageMetadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); + + /// Serialize metadata to string (very same with saveToBuffer) + std::string serializeToString(); + +private: + /// Fsync metadata file if 'sync' flag is set. + void save(bool sync = false); + void saveToBuffer(WriteBuffer & buffer, bool sync); + void load(); +}; + +using DiskObjectStorageMetadataUpdater = std::function; + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp index 8e680663358..a7e34f7ccd4 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes { extern const int UNKNOWN_FORMAT; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } static String revisionToString(UInt64 revision) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 2d8775030e5..43c402a4508 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -10,7 +10,7 @@ class DiskObjectStorage; /// Class implements storage of ObjectStorage metadata inside object storage itself, /// so it's possible to recover from this remote information in case of local disk loss. /// -/// This machanism can be enabled with `true` option inside +/// This mechanism can be enabled with `true` option inside /// disk configuration. Implemented only for S3 and Azure Block storage. Other object storages /// doesn't support metadata for blobs. /// diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index ebf3ba54324..b96ddeb2de1 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -65,7 +65,7 @@ public: /// Path exists or not virtual bool exists(const std::string & path) const = 0; - /// List on prefix, return childs with their sizes. + /// List on prefix, return children with their sizes. virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; /// Get object metadata if supported. It should be possible to receive From e481a707aaeebecb9967d2c642191cab16cecf5b Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 23 May 2022 11:51:57 +0300 Subject: [PATCH 050/150] Fixed test --- .../02020_alter_table_modify_comment.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference index a9c37eb2ba5..d2e74fd07f0 100644 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference @@ -124,21 +124,21 @@ CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\n comment= Test table with comment change a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'new comment on a table\' +comment= new comment on a table remove a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192 +comment= add a comment back -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table detach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' re-attach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table From 216184dfd34c3f7c1c2bda232114c910c578174d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 13:20:32 +0200 Subject: [PATCH 051/150] Rename one more file --- .../AzureBlobStorage/AzureObjectStorage.h | 2 +- .../registerDiskAzureBlobStorage.cpp | 2 +- .../DiskObjectStorageCommon.cpp} | 5 +++-- .../DiskObjectStorageCommon.h} | 10 ++++++--- .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 2 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/S3/diskSettings.h | 2 +- src/Disks/ObjectStorages/S3/parseConfig.h | 17 +++++++-------- .../ObjectStorages/S3/registerDiskS3.cpp | 21 ++++++++++++------- src/IO/WriteBufferFromAzureBlobStorage.cpp | 1 - 10 files changed, 36 insertions(+), 28 deletions(-) rename src/Disks/{RemoteDisksCommon.cpp => ObjectStorages/DiskObjectStorageCommon.cpp} (97%) rename src/Disks/{RemoteDisksCommon.h => ObjectStorages/DiskObjectStorageCommon.h} (85%) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 9012449e284..37c3ba72ed9 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -3,7 +3,7 @@ #if USE_AZURE_BLOB_STORAGE -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index faaec6ee95c..ce8d906d699 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp similarity index 97% rename from src/Disks/RemoteDisksCommon.cpp rename to src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp index da6ffed5f11..eb9d7107d39 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -8,7 +8,8 @@ namespace DB { namespace ErrorCodes -{extern const int BAD_ARGUMENTS; +{ + extern const int BAD_ARGUMENTS; } std::shared_ptr wrapWithCache( diff --git a/src/Disks/RemoteDisksCommon.h b/src/Disks/ObjectStorages/DiskObjectStorageCommon.h similarity index 85% rename from src/Disks/RemoteDisksCommon.h rename to src/Disks/ObjectStorages/DiskObjectStorageCommon.h index 661d4e293df..9cc3b971865 100644 --- a/src/Disks/RemoteDisksCommon.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.h @@ -2,18 +2,22 @@ #include #include + #include #include -#include -#include #include +#include +#include namespace DB { std::shared_ptr wrapWithCache( - std::shared_ptr disk, String cache_name, String cache_path, String metadata_path); + std::shared_ptr disk, + String cache_name, + String cache_path, + String metadata_path); std::pair prepareForLocalMetadata( const String & name, diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 0041da6881d..50ed52e5c78 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -1,7 +1,7 @@ #include +#include #include #include -#include #include namespace DB diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 5c8287e5d84..991474f5a98 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -3,7 +3,7 @@ #if USE_AWS_S3 #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index fc5a09ce825..b06e412b6e9 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -12,9 +12,9 @@ #include #include #include +#include #include #include -#include #include namespace DB diff --git a/src/Disks/ObjectStorages/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h index 2d14ce9468b..725c86ce462 100644 --- a/src/Disks/ObjectStorages/S3/parseConfig.h +++ b/src/Disks/ObjectStorages/S3/parseConfig.h @@ -6,15 +6,14 @@ #include #include -#include -#include "Disks/DiskCacheWrapper.h" -#include "Storages/StorageS3Settings.h" -#include "ProxyConfiguration.h" -#include "ProxyListConfiguration.h" -#include "ProxyResolverConfiguration.h" -#include "Disks/DiskRestartProxy.h" -#include "Disks/DiskLocal.h" -#include "Disks/RemoteDisksCommon.h" +#include +#include +#include +#include +#include +#include +#include +#include #include diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index fe3aa78b0b3..9c9c76ad451 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -9,19 +9,24 @@ #if USE_AWS_S3 #include -#include -#include -#include + +#include + #include -#include +#include +#include + +#include +#include #include #include #include -#include -#include -#include +#include #include -#include + +#include + +#include namespace DB { diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp index 18e03b08817..51d8bf6aba2 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp @@ -3,7 +3,6 @@ #if USE_AZURE_BLOB_STORAGE #include -#include #include #include From e76597e5d39d4cc8edbef649551416a1ba8dd6c5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 14:46:21 +0200 Subject: [PATCH 052/150] Update src/Core/Settings.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f563260c48e..bdbf9be56fc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -86,7 +86,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ - M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Make sense for debug only.", 0) \ + M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ From cef3d96cfe29040f0313aded0197488be13c8dae Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 14:46:25 +0200 Subject: [PATCH 053/150] Update src/Disks/ObjectStorages/S3/S3ObjectStorage.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index dc939e5e9dd..9c6c7664ffb 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -79,7 +79,7 @@ public: const WriteSettings & write_settings = {}) override; void listPrefix(const std::string & path, BlobsPathToSize & children) const override; - /// Remove file. Throws exception if file doesn't exists or it's a directory. + /// Remove file. Throws exception if file doesn't exist or it's a directory. void removeObject(const std::string & path) override; void removeObjects(const std::vector & paths) override; From 37b66c8a9e70cab6b04ab87d4366ee0d42858a7f Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 12:48:48 +0000 Subject: [PATCH 054/150] Check format name on storage creation --- src/Formats/FormatFactory.cpp | 7 +++++++ src/Formats/FormatFactory.h | 3 +++ src/Storages/HDFS/StorageHDFS.cpp | 1 + src/Storages/StorageFile.cpp | 2 ++ src/Storages/StorageS3.cpp | 1 + src/Storages/StorageURL.cpp | 1 + .../02311_create_table_with_unknown_format.reference | 0 .../0_stateless/02311_create_table_with_unknown_format.sql | 4 ++++ 8 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/02311_create_table_with_unknown_format.reference create mode 100644 tests/queries/0_stateless/02311_create_table_with_unknown_format.sql diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4c1b23a75ab..33fd68e67f7 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -585,6 +585,13 @@ bool FormatFactory::checkIfFormatHasAnySchemaReader(const String & name) return checkIfFormatHasSchemaReader(name) || checkIfFormatHasExternalSchemaReader(name); } +void FormatFactory::checkFormatName(const String & name) const +{ + auto it = dict.find(name); + if (it == dict.end()) + throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT); +} + FormatFactory & FormatFactory::instance() { static FormatFactory ret; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index f7d3c23d3b4..0431d1ef8c9 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -210,6 +210,9 @@ public: bool isInputFormat(const String & name) const; bool isOutputFormat(const String & name) const; + /// Check that format with specified name exists and throw an exception otherwise. + void checkFormatName(const String & name) const; + private: FormatsDictionary dict; FileExtensionFormats file_extension_formats; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index d114bb67016..a90db60e341 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -146,6 +146,7 @@ StorageHDFS::StorageHDFS( , distributed_processing(distributed_processing_) , partition_by(partition_by_) { + FormatFactory::instance().checkFormatName(format_name); context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); checkHDFSURL(uri_); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 47e32337dfe..a3a1d061ecc 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -382,6 +382,8 @@ StorageFile::StorageFile(CommonArguments args) , compression_method(args.compression_method) , base_path(args.getContext()->getPath()) { + if (format_name != "Distributed") + FormatFactory::instance().checkFormatName(format_name); } void StorageFile::setStorageMetadata(CommonArguments args) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d402dce5ede..d7e3d5e5374 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -609,6 +609,7 @@ StorageS3::StorageS3( , partition_by(partition_by_) , is_key_with_globs(uri_.key.find_first_of("*?{") != std::string::npos) { + FormatFactory::instance().checkFormatName(format_name); context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri_.uri); StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0db4fa75aba..1961711785d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -74,6 +74,7 @@ IStorageURLBase::IStorageURLBase( , http_method(http_method_) , partition_by(partition_by_) { + FormatFactory::instance().checkFormatName(format_name); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.reference b/tests/queries/0_stateless/02311_create_table_with_unknown_format.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql new file mode 100644 index 00000000000..5f43ecd1c65 --- /dev/null +++ b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql @@ -0,0 +1,4 @@ +create table test_02311 (x UInt32) engine=File(UnknownFormat); -- {serverError UNKNOWN_FORMAT} +create table test_02311 (x UInt32) engine=URL('http://some/url', UnknownFormat); -- {serverError UNKNOWN_FORMAT} +create table test_02311 (x UInt32) engine=S3('http://host:2020/test/data', UnknownFormat); -- {serverError UNKNOWN_FORMAT} +create table test_02311 (x UInt32) engine=HDFS('http://hdfs:9000/data', UnknownFormat); -- {serverError UNKNOWN_FORMAT} From 1b17086266ed6cf146b0103f66ef9d4417cc5e72 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:01:44 +0200 Subject: [PATCH 055/150] Update src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 43c402a4508..3c638d94d3d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -11,7 +11,7 @@ class DiskObjectStorage; /// so it's possible to recover from this remote information in case of local disk loss. /// /// This mechanism can be enabled with `true` option inside -/// disk configuration. Implemented only for S3 and Azure Block storage. Other object storages +/// disk configuration. Implemented only for S3 and Azure Blob storage. Other object storages /// doesn't support metadata for blobs. /// /// FIXME: this class is very intrusive and use a lot of DiskObjectStorage internals. From 1a78ea75bb9f113aa61b834b38afae8e0f7cb369 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:01:51 +0200 Subject: [PATCH 056/150] Update src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 3c638d94d3d..89153e4a39c 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -12,7 +12,7 @@ class DiskObjectStorage; /// /// This mechanism can be enabled with `true` option inside /// disk configuration. Implemented only for S3 and Azure Blob storage. Other object storages -/// doesn't support metadata for blobs. +/// don't support metadata for blobs. /// /// FIXME: this class is very intrusive and use a lot of DiskObjectStorage internals. /// FIXME: it's very complex and unreliable, need to implement something better. From 2064934e5928b138564dbeffe14ba476f483150a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:02:29 +0200 Subject: [PATCH 057/150] Update src/Disks/ObjectStorages/DiskObjectStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/DiskObjectStorage.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 163e2087bed..c235e1a864a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -423,8 +423,9 @@ void DiskObjectStorage::removeMetadata(const String & path, std::vector { for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) { - paths_to_remove.push_back(fs::path(remote_fs_root_path) / remote_fs_object_path); - object_storage->removeFromCache(fs::path(remote_fs_root_path) / remote_fs_object_path); + String object_path = fs::path(remote_fs_root_path) / remote_fs_object_path; + paths_to_remove.push_back(object_path); + object_storage->removeFromCache(object_path); } return false; From 1e1e6d4fa0a6ee55f765903d0d0261a131d75bee Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:24:06 +0200 Subject: [PATCH 058/150] Review fixes --- .../registerDiskAzureBlobStorage.cpp | 6 ++--- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 1 - .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 5 ++-- src/Disks/ObjectStorages/IObjectStorage.h | 4 ++-- .../ObjectStorages/S3/S3ObjectStorage.cpp | 23 +++++++++++++++---- src/IO/ReadBufferFromAzureBlobStorage.h | 4 ++-- 6 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index ce8d906d699..92ba6e426b3 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -72,15 +72,13 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) { auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - + /// FIXME Cache currently unsupported :( ObjectStoragePtr azure_object_storage = std::make_unique( - std::move(cache), + nullptr, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context)); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 82dad1dece0..4574b8cb52c 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -60,7 +60,6 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } - /// Open the file for write and return WriteBufferFromFileBase object. std::unique_ptr HDFSObjectStorage::writeObject( /// NOLINT const std::string & path, WriteMode mode, diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 50ed52e5c78..04862e43c65 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -31,9 +31,8 @@ void registerDiskHDFS(DiskFactory & factory) config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), context_->getSettingsRef().hdfs_replication ); - FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context_); - - ObjectStoragePtr hdfs_storage = std::make_unique(std::move(cache), uri, std::move(settings), config); + /// FIXME Cache currently unsupported :( + ObjectStoragePtr hdfs_storage = std::make_unique(nullptr, uri, std::move(settings), config); auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index b96ddeb2de1..b9ac497f54f 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -72,14 +72,14 @@ public: /// at least size of object virtual ObjectMetadata getObjectMetadata(const std::string & path) const = 0; - /// Read single path from object storage, don't use cache + /// Read single path from object storage virtual std::unique_ptr readObject( /// NOLINT const std::string & path, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; - /// Read multiple objects with common prefix, use cache + /// Read multiple objects with common prefix virtual std::unique_ptr readObjects( /// NOLINT const std::string & common_path_prefix, const BlobsPathToSize & blobs_to_read, diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 991474f5a98..8988a456f52 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -139,7 +139,16 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::optional) const { auto settings_ptr = s3_settings.get(); - return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); + ReadSettings disk_read_settings{read_settings}; + if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + + disk_read_settings.remote_fs_cache = cache; + } + + return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); } @@ -190,12 +199,13 @@ void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & chi auto result = outcome.GetResult(); auto objects = result.GetContents(); - for (const auto & object : objects) - children.emplace_back(object.GetKey(), object.GetSize()); if (objects.empty()) break; + for (const auto & object : objects) + children.emplace_back(object.GetKey(), object.GetSize()); + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); } while (outcome.GetResult().GetIsTruncated()); } @@ -249,7 +259,8 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); + if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throwIfError(outcome); } } @@ -265,7 +276,9 @@ void S3ObjectStorage::removeObjectIfExists(const std::string & path) Aws::S3::Model::DeleteObjectsRequest request; request.SetBucket(bucket); request.SetDelete(delkeys); - client_ptr->DeleteObjects(request); + auto outcome = client_ptr->DeleteObjects(request); + if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throwIfError(outcome); } void S3ObjectStorage::removeObjectsIfExist(const std::vector & paths) diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index b7459ccead1..e5a39b84d45 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -24,10 +24,10 @@ public: size_t max_single_download_retries_, size_t tmp_buffer_size_, bool use_external_buffer_ = false, - size_t read_until_position_ = 0 - ); + size_t read_until_position_ = 0); off_t seek(off_t off, int whence) override; + off_t getPosition() override; bool nextImpl() override; From b73d49158d73245e8ee190dfefad196b017f9bcb Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 23 May 2022 17:01:45 +0200 Subject: [PATCH 059/150] Fix test --- .../0_stateless/02311_create_table_with_unknown_format.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql index 5f43ecd1c65..d046ffebca2 100644 --- a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql +++ b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + create table test_02311 (x UInt32) engine=File(UnknownFormat); -- {serverError UNKNOWN_FORMAT} create table test_02311 (x UInt32) engine=URL('http://some/url', UnknownFormat); -- {serverError UNKNOWN_FORMAT} create table test_02311 (x UInt32) engine=S3('http://host:2020/test/data', UnknownFormat); -- {serverError UNKNOWN_FORMAT} From 164f8227f92fbdd0d0ae1f2fb05d24e628d9d29b Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 24 May 2022 12:38:11 +0200 Subject: [PATCH 060/150] Fix memory sanitizer --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 8988a456f52..78c0fa0ae3c 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -259,8 +259,7 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - throwIfError(outcome); + throwIfError(outcome); } } @@ -277,7 +276,7 @@ void S3ObjectStorage::removeObjectIfExists(const std::string & path) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + if (!outcome.IsSuccess() && outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) throwIfError(outcome); } @@ -314,7 +313,8 @@ void S3ObjectStorage::removeObjectsIfExist(const std::vector & path request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); + if (!outcome.IsSuccess() && outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throwIfError(outcome); } } From 2f37ad7fb8502c2ad21cebff8a7ee22497528efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 24 May 2022 00:12:00 +0200 Subject: [PATCH 061/150] Improve comment --- src/Core/SettingsFields.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 7b820401468..d35865d35c6 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -255,9 +255,13 @@ void SettingFieldMilliseconds::parseFromString(const String & str) template void SettingFieldTimespan::writeBinary(WriteBuffer & out) const { - /// Note that this is unchanged and returns UInt64 for both seconds and milliseconds for - /// compatibility reasons as it's only used by the clients or servers older than - /// DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS + /// Note that this returns an UInt64 (for both seconds and milliseconds units) for compatibility reasons as the value + /// for seconds used to be a integer (now a Float64) + /// This method is only used to communicate with clients or servers older than DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS + /// in which the value was passed as binary (as a UInt64) + /// Later versions pass the setting values as String (using toString() and parseFromString()) and there passing "1.2" will + /// lead to `1` on releases with integer seconds or `1.2` on more recent releases + /// See https://github.com/ClickHouse/ClickHouse/issues/36940 for more details auto num_units = operator UInt64(); writeVarUInt(num_units, out); } From cd4020fa2ff6ed0c283accbe9437be2a581d6b5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Sat, 14 May 2022 12:31:52 +0200 Subject: [PATCH 062/150] Reduce flakiness --- .../02294_floating_point_second_in_settings.reference | 2 +- .../0_stateless/02294_floating_point_second_in_settings.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference b/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference index f6216e2486a..c12d45c746a 100644 --- a/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.reference @@ -5,4 +5,4 @@ maximum: 1.1 HTTP CLIENT maximum: 1.1 TABLE: system.settings -max_execution_time 0.1 1 +max_execution_time 30.5 1 diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh index 78aece76e49..b5bf2deb974 100755 --- a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh @@ -38,4 +38,4 @@ check_output "${OUTPUT}" # CHECK system.settings echo "TABLE: system.settings" -echo "SELECT name, value, changed from system.settings where name = 'max_execution_time'" | clickhouse-client --max_execution_time 0.1 +echo "SELECT name, value, changed from system.settings where name = 'max_execution_time'" | clickhouse-client --max_execution_time 30.5 From 59b4d4a643c24244d28a991a93346c9f8fd330bf Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 24 May 2022 21:08:30 +0300 Subject: [PATCH 063/150] ALTER COMMENT is now local-only operation and immediately observable --- .../ReplicatedMergeTreeTableMetadata.cpp | 22 +----------- .../ReplicatedMergeTreeTableMetadata.h | 6 +--- src/Storages/StorageReplicatedMergeTree.cpp | 36 ++++++++++++------- 3 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 393c2eb0dd1..eee046206ba 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -73,7 +73,6 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity_bytes = 0; constraints = metadata_snapshot->getConstraints().toString(); - comment = metadata_snapshot->comment; } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -109,9 +108,6 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!constraints.empty()) out << "constraints: " << constraints << "\n"; - - if (!comment.empty()) - out << "comment: " << quote << comment << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -159,18 +155,8 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) else index_granularity_bytes = 0; - String verb; - readStringUntilWhitespace(verb, in); - - if (verb == "constraints:") - { + if (checkString("constraints: ", in)) in >> " " >> constraints >> "\n"; - - readStringUntilWhitespace(verb, in); - } - - if (verb == "comment:") - in >> " " >> quote >> comment >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -364,12 +350,6 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl diff.new_constraints = from_zk.constraints; } - if (comment != from_zk.comment) - { - diff.comment_changed = true; - diff.comment = from_zk.comment; - } - return diff; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 246cf863d13..6d510d20304 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -30,7 +30,6 @@ struct ReplicatedMergeTreeTableMetadata String projections; String constraints; String ttl_table; - String comment; UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; @@ -62,13 +61,10 @@ struct ReplicatedMergeTreeTableMetadata bool ttl_table_changed = false; String new_ttl_table; - bool comment_changed = false; - String comment; - bool empty() const { return !sorting_key_changed && !sampling_expression_changed && !skip_indices_changed && !projections_changed - && !ttl_table_changed && !constraints_changed && !comment_changed; + && !ttl_table_changed && !constraints_changed; } }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a72866d1dde..7fc9e6acf59 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1152,8 +1152,6 @@ void StorageReplicatedMergeTree::setTableStructure( } } - if (metadata_diff.comment_changed) - new_metadata.comment = metadata_diff.comment; } /// Changes in columns may affect following metadata fields @@ -1201,6 +1199,7 @@ void StorageReplicatedMergeTree::setTableStructure( auto table_id = getStorageID(); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(getContext(), table_id, new_metadata); + setInMemoryMetadata(new_metadata); } @@ -4717,9 +4716,9 @@ void StorageReplicatedMergeTree::alter( auto table_id = getStorageID(); - if (commands.isSettingsAlter()) + if (commands.isSettingsAlter() || commands.isCommentAlter()) { - /// We don't replicate storage_settings_ptr ALTER. It's local operation. + /// We don't replicate storage_settings_ptr or table comment ALTER. Those are local operations. /// Also we don't upgrade alter lock to table structure lock. StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); commands.apply(future_metadata, query_context); @@ -4729,6 +4728,7 @@ void StorageReplicatedMergeTree::alter( changeSettings(future_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); + setInMemoryMetadata(future_metadata); return; } @@ -4779,11 +4779,6 @@ void StorageReplicatedMergeTree::alter( future_metadata_in_zk.ttl_table = ""; } - if (future_metadata.comment != current_metadata->comment) - { - future_metadata_in_zk.comment = future_metadata.comment; - } - String new_indices_str = future_metadata.secondary_indices.toString(); if (new_indices_str != current_metadata->secondary_indices.toString()) future_metadata_in_zk.skip_indices = new_indices_str; @@ -4806,13 +4801,26 @@ void StorageReplicatedMergeTree::alter( String new_columns_str = future_metadata.columns.toString(); ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "columns", new_columns_str, -1)); - if (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)) + // Local-only operations. + const bool settings_changed = ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes); + const bool comment_changed = current_metadata->comment != future_metadata.comment; + + if (settings_changed || comment_changed) { - /// Just change settings StorageInMemoryMetadata metadata_copy = *current_metadata; - metadata_copy.settings_changes = future_metadata.settings_changes; - changeSettings(metadata_copy.settings_changes, table_lock_holder); + + if (comment_changed) + metadata_copy.comment = future_metadata.comment; + + if (settings_changed) + { + /// Just change settings + metadata_copy.settings_changes = future_metadata.settings_changes; + changeSettings(metadata_copy.settings_changes, table_lock_holder); + } + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata_copy); + setInMemoryMetadata(metadata_copy); } /// We can be sure, that in case of successful commit in zookeeper our @@ -4870,7 +4878,9 @@ void StorageReplicatedMergeTree::alter( String metadata_zk_path = fs::path(txn->getDatabaseZooKeeperPath()) / "metadata" / escapeForFileName(table_id.table_name); auto ast = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, query_context); applyMetadataChangesToCreateQuery(ast, future_metadata); + setInMemoryMetadata(future_metadata); ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, getObjectDefinitionFromCreateQuery(ast), -1)); + } Coordination::Responses results; From e3f76cab55d55330b2208545656e0780a19834f5 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 16:54:28 +0800 Subject: [PATCH 064/150] impl improve remote fs cache --- src/Common/FileCache.cpp | 54 +++++++++++++++++++++++++++----- src/Common/FileCache.h | 10 ++++++ src/Common/FileCacheSettings.cpp | 1 + src/Common/FileCacheSettings.h | 2 ++ src/Common/FileCache_fwd.h | 1 + src/Common/FileSegment.cpp | 9 ++++++ 6 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index ae1b1afdd09..4a7cea9f004 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -72,6 +72,8 @@ void IFileCache::assertInitialized() const LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_) : IFileCache(cache_base_path_, cache_settings_) + , max_stash_element_size(cache_settings_.max_elements) + , enable_cache_hits_threshold(cache_settings_.enable_cache_hits_threshold) , log(&Poco::Logger::get("LRUFileCache")) { } @@ -404,9 +406,46 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( "Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}", keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock)); - auto file_segment = std::make_shared(offset, size, key, this, state); - FileSegmentCell cell(std::move(file_segment), this, cache_lock); + auto skip_or_download = [&]() -> FileSegmentPtr + { + if (state == FileSegment::State::EMPTY) + { + LOG_TEST(log, "[addCell] FileSegment key:{}, offset:{}, state:{}, enable_cache_hits:{}, current_element_size:{}/{}.", + keyToStr(key), offset, FileSegment::stateToString(state), enable_cache_hits_threshold, stash_queue.getElementsNum(cache_lock), max_stash_element_size); + + auto record = records.find({key, offset}); + if (record == records.end()) + { + auto queue_iter = stash_queue.add(key, offset, 0, cache_lock); + records.insert({{key, offset}, queue_iter}); + + if (stash_queue.getElementsNum(cache_lock) > max_stash_element_size) + { + auto remove_queue_iter = stash_queue.begin(); + records.erase({remove_queue_iter->key, remove_queue_iter->offset}); + stash_queue.remove(remove_queue_iter, cache_lock); + } + /// For segments that do not reach the download threshold, we do not download them, but directly read them + return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + } + else + { + auto queue_iter = record->second; + queue_iter->hits++; + stash_queue.moveToEnd(queue_iter, cache_lock); + + if (queue_iter->hits >= enable_cache_hits_threshold) + return std::make_shared(offset, size, key, this, FileSegment::State::EMPTY); + else + return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + } + } + else + return std::make_shared(offset, size, key, this, state); + }; + + FileSegmentCell cell(skip_or_download(), this, cache_lock); auto & offsets = files[key]; if (offsets.empty()) @@ -471,7 +510,7 @@ bool LRUFileCache::tryReserve( std::vector to_evict; std::vector trash; - for (const auto & [entry_key, entry_offset, entry_size] : queue) + for (const auto & [entry_key, entry_offset, entry_size, entry_hits] : queue) { if (!is_overflow()) break; @@ -619,7 +658,7 @@ void LRUFileCache::remove() std::vector to_remove; for (auto it = queue.begin(); it != queue.end();) { - const auto & [key, offset, size] = *it++; + const auto & [key, offset, size, hits] = *it++; auto * cell = getCell(key, offset, cache_lock); if (!cell) throw Exception( @@ -882,6 +921,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell( queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock); break; } + case FileSegment::State::SKIP_CACHE: case FileSegment::State::EMPTY: case FileSegment::State::DOWNLOADING: { @@ -934,7 +974,7 @@ bool LRUFileCache::LRUQueue::contains( { /// This method is used for assertions in debug mode. /// So we do not care about complexity here. - for (const auto & [entry_key, entry_offset, size] : queue) + for (const auto & [entry_key, entry_offset, size, hits] : queue) { if (key == entry_key && offset == entry_offset) return true; @@ -947,7 +987,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g [[maybe_unused]] size_t total_size = 0; for (auto it = queue.begin(); it != queue.end();) { - auto & [key, offset, size] = *it++; + auto & [key, offset, size, hits] = *it++; auto * cell = cache->getCell(key, offset, cache_lock); if (!cell) @@ -969,7 +1009,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g String LRUFileCache::LRUQueue::toString(std::lock_guard & /* cache_lock */) const { String result; - for (const auto & [key, offset, size] : queue) + for (const auto & [key, offset, size, hits] : queue) { if (!result.empty()) result += ", "; diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index ff65b579470..42feb9727b6 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -165,6 +165,7 @@ private: Key key; size_t offset; size_t size; + size_t hits = 0; FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {} }; @@ -223,8 +224,17 @@ private: using FileSegmentsByOffset = std::map; using CachedFiles = std::unordered_map; + using AccessKeyAndOffset = std::pair; + using AccessRecord = std::map; + CachedFiles files; LRUQueue queue; + + LRUQueue stash_queue; + AccessRecord records; + size_t max_stash_element_size; + size_t enable_cache_hits_threshold; + Poco::Logger * log; FileSegments getImpl( diff --git a/src/Common/FileCacheSettings.cpp b/src/Common/FileCacheSettings.cpp index f555de277b2..6982068e40f 100644 --- a/src/Common/FileCacheSettings.cpp +++ b/src/Common/FileCacheSettings.cpp @@ -11,6 +11,7 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); + enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD); } } diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h index 0b34e1e3d82..2f508c3ef46 100644 --- a/src/Common/FileCacheSettings.h +++ b/src/Common/FileCacheSettings.h @@ -14,6 +14,8 @@ struct FileCacheSettings size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; bool cache_on_write_operations = false; + size_t enable_cache_hits_threshold = REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD; + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Common/FileCache_fwd.h b/src/Common/FileCache_fwd.h index 7448f0c8c89..8a7c2eeb458 100644 --- a/src/Common/FileCache_fwd.h +++ b/src/Common/FileCache_fwd.h @@ -7,6 +7,7 @@ namespace DB static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE = 1024 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; +static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0; class IFileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 356ba8bf55f..4b8ce9d4b77 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -59,6 +59,10 @@ FileSegment::FileSegment( downloader_id = getCallerId(); break; } + case (State::SKIP_CACHE): + { + break; + } default: { throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state"); @@ -525,6 +529,11 @@ void FileSegment::complete(std::lock_guard & cache_lock) void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std::lock_guard & segment_lock) { + bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); + + if (is_last_holder && download_state == State::SKIP_CACHE) + cache->remove(key(), offset(), cache_lock, segment_lock); + if (download_state == State::SKIP_CACHE || is_detached) return; From 9518c41dda147023d37c47b220c2650999411bea Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 25 May 2022 09:01:12 +0000 Subject: [PATCH 065/150] Try to fix tests --- .../test_allowed_url_from_config/test.py | 24 +++++++++---------- ...02311_create_table_with_unknown_format.sql | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_allowed_url_from_config/test.py b/tests/integration/test_allowed_url_from_config/test.py index 01a2a500ebf..da9d4404c82 100644 --- a/tests/integration/test_allowed_url_from_config/test.py +++ b/tests/integration/test_allowed_url_from_config/test.py @@ -33,7 +33,7 @@ def start_cluster(): def test_config_with_hosts(start_cluster): assert ( node1.query( - "CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)" + "CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', CSV)" ) == "" ) @@ -44,7 +44,7 @@ def test_config_with_hosts(start_cluster): == "" ) assert "not allowed" in node1.query_and_get_error( - "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)" + "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', CSV)" ) assert "not allowed" in node1.query_and_get_error( "CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)" @@ -60,7 +60,7 @@ def test_config_with_only_primary_hosts(start_cluster): ) assert ( node2.query( - "CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)" + "CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', CSV)" ) == "" ) @@ -72,25 +72,25 @@ def test_config_with_only_primary_hosts(start_cluster): ) assert ( node2.query( - "CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)" + "CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', CSV)" ) == "" ) assert "not allowed" in node2.query_and_get_error( - "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)" + "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', CSV)" ) assert "not allowed" in node2.query_and_get_error( "CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)" ) assert "not allowed" in node2.query_and_get_error( - "CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)" + "CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', CSV)" ) def test_config_with_only_regexp_hosts(start_cluster): assert ( node3.query( - "CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)" + "CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', CSV)" ) == "" ) @@ -104,7 +104,7 @@ def test_config_with_only_regexp_hosts(start_cluster): "CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)" ) assert "not allowed" in node3.query_and_get_error( - "CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)" + "CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', CSV)" ) @@ -123,7 +123,7 @@ def test_config_without_allowed_hosts_section(start_cluster): ) assert ( node4.query( - "CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)" + "CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', CSV)" ) == "" ) @@ -135,7 +135,7 @@ def test_config_without_allowed_hosts_section(start_cluster): ) assert ( node4.query( - "CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)" + "CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', CSV)" ) == "" ) @@ -149,13 +149,13 @@ def test_config_without_allowed_hosts(start_cluster): "CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)" ) assert "not allowed" in node5.query_and_get_error( - "CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)" + "CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', CSV)" ) assert "not allowed" in node5.query_and_get_error( "CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)" ) assert "not allowed" in node5.query_and_get_error( - "CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)" + "CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', CSV)" ) diff --git a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql index d046ffebca2..54e388c3cf0 100644 --- a/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql +++ b/tests/queries/0_stateless/02311_create_table_with_unknown_format.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, use-hdfs, no-backward-compatibility-check:22.5 create table test_02311 (x UInt32) engine=File(UnknownFormat); -- {serverError UNKNOWN_FORMAT} create table test_02311 (x UInt32) engine=URL('http://some/url', UnknownFormat); -- {serverError UNKNOWN_FORMAT} From 1ce219bae24579457aa4081787200709f18ced18 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 17:24:38 +0800 Subject: [PATCH 066/150] fix --- src/Common/FileCache.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 4a7cea9f004..bbd1d491685 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -426,19 +426,19 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( records.erase({remove_queue_iter->key, remove_queue_iter->offset}); stash_queue.remove(remove_queue_iter, cache_lock); } + /// For segments that do not reach the download threshold, we do not download them, but directly read them - return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + state = queue_iter->hits >= enable_cache_hits_threshold ? FileSegment::State::EMPTY : FileSegment::State::SKIP_CACHE; + return std::make_shared(offset, size, key, this, state); } else { auto queue_iter = record->second; queue_iter->hits++; stash_queue.moveToEnd(queue_iter, cache_lock); - - if (queue_iter->hits >= enable_cache_hits_threshold) - return std::make_shared(offset, size, key, this, FileSegment::State::EMPTY); - else - return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + + state = queue_iter->hits >= enable_cache_hits_threshold ? FileSegment::State::EMPTY : FileSegment::State::SKIP_CACHE; + return std::make_shared(offset, size, key, this, state); } } else From d0fcffec66aa96f0972a0262282d053aa6ffa852 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 17:51:03 +0800 Subject: [PATCH 067/150] fix style --- src/Common/FileCache.cpp | 3 --- src/Common/FileCache.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index bbd1d491685..efbe869db06 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -410,9 +410,6 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( { if (state == FileSegment::State::EMPTY) { - LOG_TEST(log, "[addCell] FileSegment key:{}, offset:{}, state:{}, enable_cache_hits:{}, current_element_size:{}/{}.", - keyToStr(key), offset, FileSegment::stateToString(state), enable_cache_hits_threshold, stash_queue.getElementsNum(cache_lock), max_stash_element_size); - auto record = records.find({key, offset}); if (record == records.end()) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 42feb9727b6..b25a9d69249 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -234,7 +234,7 @@ private: AccessRecord records; size_t max_stash_element_size; size_t enable_cache_hits_threshold; - + Poco::Logger * log; FileSegments getImpl( From 2211c1ddb8c4b07f6bacdbd51fc45fa93a9db78e Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 20:15:43 +0800 Subject: [PATCH 068/150] fix --- src/Common/FileCache.h | 12 +++++++++++- src/Common/FileSegment.cpp | 3 +-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index b25a9d69249..a367d47885b 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -225,7 +226,16 @@ private: using CachedFiles = std::unordered_map; using AccessKeyAndOffset = std::pair; - using AccessRecord = std::map; + + struct KeyAndOffsetHash + { + std::size_t operator()(const AccessKeyAndOffset & key) const + { + return std::hash()(key.first) ^ std::hash()(key.second); + } + }; + + using AccessRecord = std::unordered_map; CachedFiles files; LRUQueue queue; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 4b8ce9d4b77..aee3d470f44 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -551,8 +551,7 @@ void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the /// downloader or the only owner of the segment. - bool can_update_segment_state = isDownloaderImpl(segment_lock) - || cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); + bool can_update_segment_state = isDownloaderImpl(segment_lock) || is_last_holder; if (can_update_segment_state) download_state = State::PARTIALLY_DOWNLOADED; From adbb8211766f586705483a6a2c6fca81e858e36b Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 21:05:15 +0800 Subject: [PATCH 069/150] fix --- src/Common/FileCache.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index efbe869db06..714fa8d737e 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -507,7 +507,7 @@ bool LRUFileCache::tryReserve( std::vector to_evict; std::vector trash; - for (const auto & [entry_key, entry_offset, entry_size, entry_hits] : queue) + for (const auto & [entry_key, entry_offset, entry_size, _] : queue) { if (!is_overflow()) break; @@ -655,7 +655,7 @@ void LRUFileCache::remove() std::vector to_remove; for (auto it = queue.begin(); it != queue.end();) { - const auto & [key, offset, size, hits] = *it++; + const auto & [key, offset, size, _] = *it++; auto * cell = getCell(key, offset, cache_lock); if (!cell) throw Exception( @@ -935,7 +935,7 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add( const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard & /* cache_lock */) { #ifndef NDEBUG - for (const auto & [entry_key, entry_offset, _] : queue) + for (const auto & [entry_key, entry_offset, _, _] : queue) { if (entry_key == key && entry_offset == offset) throw Exception( @@ -971,7 +971,7 @@ bool LRUFileCache::LRUQueue::contains( { /// This method is used for assertions in debug mode. /// So we do not care about complexity here. - for (const auto & [entry_key, entry_offset, size, hits] : queue) + for (const auto & [entry_key, entry_offset, size, _] : queue) { if (key == entry_key && offset == entry_offset) return true; @@ -984,7 +984,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g [[maybe_unused]] size_t total_size = 0; for (auto it = queue.begin(); it != queue.end();) { - auto & [key, offset, size, hits] = *it++; + auto & [key, offset, size, _] = *it++; auto * cell = cache->getCell(key, offset, cache_lock); if (!cell) @@ -1006,7 +1006,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g String LRUFileCache::LRUQueue::toString(std::lock_guard & /* cache_lock */) const { String result; - for (const auto & [key, offset, size, hits] : queue) + for (const auto & [key, offset, size, _] : queue) { if (!result.empty()) result += ", "; From 875557abc23ddd4ad8da81d75e5bf1a1662c8faa Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 21:53:28 +0800 Subject: [PATCH 070/150] fix --- src/Common/FileCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 714fa8d737e..c28794ab23e 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -935,7 +935,7 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add( const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard & /* cache_lock */) { #ifndef NDEBUG - for (const auto & [entry_key, entry_offset, _, _] : queue) + for (const auto & [entry_key, entry_offset, _, __] : queue) { if (entry_key == key && entry_offset == offset) throw Exception( From fda6ddeffa608611947d9c0fab263bca9f3a2660 Mon Sep 17 00:00:00 2001 From: msaf1980 Date: Wed, 25 May 2022 19:23:05 +0500 Subject: [PATCH 071/150] cleanup StorageHDFS (unused variables) --- src/Storages/HDFS/StorageHDFS.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index c8ebbfcfaac..1f9e57ab2b7 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -132,8 +132,6 @@ private: Block block_for_format; std::vector requested_virtual_columns; UInt64 max_block_size; - bool need_path_column; - bool need_file_column; std::shared_ptr file_iterator; ColumnsDescription columns_description; From 16e839ac71065c27567ed1cda94c98bea1b69d8c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 25 May 2022 14:54:49 +0000 Subject: [PATCH 072/150] add profile events for introspection of part types --- src/Common/ProfileEvents.cpp | 7 +++ src/Interpreters/PartLog.cpp | 3 ++ src/Interpreters/PartLog.h | 3 ++ .../MergeTree/MergeFromLogEntryTask.cpp | 1 + .../MergeTree/MergePlainMergeTreeTask.cpp | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 44 +++++++++++++++++++ src/Storages/MergeTree/MergeTreeData.h | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 3 +- src/Storages/MergeTree/MergeTreeSink.cpp | 1 + .../MergeTree/ReplicatedMergeTreeSink.cpp | 1 + .../02306_part_types_profile_events.reference | 7 +++ .../02306_part_types_profile_events.sql | 44 +++++++++++++++++++ 12 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02306_part_types_profile_events.reference create mode 100644 tests/queries/0_stateless/02306_part_types_profile_events.sql diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 7f3b9788c1f..9fa47ff959c 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -144,6 +144,13 @@ M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \ M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \ \ + M(InsertedWideParts, "Number of parts inserted in Wide format.") \ + M(InsertedCompactParts, "Number of parts inserted in Compact format.") \ + M(InsertedInMemoryParts, "Number of parts inserted in InMemory format.") \ + M(MergedIntoWideParts, "Number of parts merged into Wide format.") \ + M(MergedIntoCompactParts, "Number of parts merged into Compact format.") \ + M(MergedIntoInMemoryParts, "Number of parts in merged into InMemory format.") \ + \ M(MergeTreeDataProjectionWriterRows, "Number of rows INSERTed to MergeTree tables projection.") \ M(MergeTreeDataProjectionWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables projection.") \ M(MergeTreeDataProjectionWriterCompressedBytes, "Bytes written to filesystem for data INSERTed to MergeTree tables projection.") \ diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index ce9aa0c03d1..6d57f6b7045 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -46,6 +46,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"table", std::make_shared()}, {"part_name", std::make_shared()}, {"partition_id", std::make_shared()}, + {"part_type", std::make_shared()}, {"disk_name", std::make_shared()}, {"path_on_disk", std::make_shared()}, @@ -80,6 +81,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table_name); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); + columns[i++]->insert(part_type.toString()); columns[i++]->insert(disk_name); columns[i++]->insert(path_on_disk); @@ -159,6 +161,7 @@ bool PartLog::addNewParts( elem.part_name = part->name; elem.disk_name = part->volume->getDisk()->getName(); elem.path_on_disk = part->getFullPath(); + elem.part_type = part->getType(); elem.bytes_compressed_on_disk = part->getBytesOnDisk(); elem.rows = part->rows_count; diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 7582f6fe9e6..470dce09fa0 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -35,6 +36,8 @@ struct PartLogElement String disk_name; String path_on_disk; + MergeTreeDataPartType part_type; + /// Size of the part UInt64 rows = 0; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 4b8860aa51d..66abe32ac25 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -322,6 +322,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); + storage.incrementMergedPartsProfileEvent(part->getType()); return true; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 0146ce4c7b3..c6a719fbc67 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -117,6 +117,7 @@ void MergePlainMergeTreeTask::finish() new_part = merge_task->getFuture().get(); storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, nullptr); write_part_log({}); + storage.incrementMergedPartsProfileEvent(new_part->getType()); } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 50811daa4ab..62c11a31f68 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -93,6 +93,12 @@ namespace ProfileEvents extern const Event DelayedInserts; extern const Event DelayedInsertsMilliseconds; extern const Event DuplicatedInsertedBlocks; + extern const Event InsertedWideParts; + extern const Event InsertedCompactParts; + extern const Event InsertedInMemoryParts; + extern const Event MergedIntoWideParts; + extern const Event MergedIntoCompactParts; + extern const Event MergedIntoInMemoryParts; } namespace CurrentMetrics @@ -1716,6 +1722,7 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa part_log_elem.part_name = part->name; part_log_elem.bytes_compressed_on_disk = part->getBytesOnDisk(); part_log_elem.rows = part->rows_count; + part_log_elem.part_type = part->getType(); part_log->add(part_log_elem); } @@ -6190,6 +6197,7 @@ try part_log_elem.path_on_disk = result_part->getFullPath(); part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk(); part_log_elem.rows = result_part->rows_count; + part_log_elem.part_type = result_part->getType(); } part_log_elem.source_part_names.reserve(source_parts.size()); @@ -6755,6 +6763,42 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } +#define FOR_EACH_PART_TYPE(M) \ + M(Wide) \ + M(Compact) \ + M(InMemory) + +#define DECLARE_INCREMENT_EVENT_CASE(Event, Type) \ + case MergeTreeDataPartType::Type: \ + ProfileEvents::increment(ProfileEvents::Event##Type##Parts); \ + break; + +#define DECLARE_INCREMENT_EVENT(value, CASE) \ + switch (value) \ + { \ + FOR_EACH_PART_TYPE(CASE) \ + default: \ + break; \ + } + +void MergeTreeData::incrementInsertedPartsProfileEvent(MergeTreeDataPartType type) +{ + #define DECLARE_INSERTED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(Inserted, Type) + DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_INSERTED_EVENT_CASE) + #undef DECLARE_INSERTED_EVENT +} + +void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) +{ + #define DECLARE_MERGED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(MergedInto, Type) + DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_MERGED_EVENT_CASE) + #undef DECLARE_MERGED_EVENT +} + +#undef FOR_EACH_PART_TYPE +#undef DECLARE_INCREMENT_EVENT_CASE +#undef DECLARE_INCREMENT_EVENT + CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger() { std::lock_guard lock(storage.currently_submerging_emerging_mutex); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index df37cd000e4..1ba09251f6f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1224,6 +1224,8 @@ protected: /// Moves part to specified space, used in ALTER ... MOVE ... queries bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space); + static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type); + static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); private: /// RAII Wrapper for atomic work with currently moving parts diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index bf247074f57..7e08fb0ccfc 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -451,6 +451,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( temp_part.streams.emplace_back(std::move(stream)); } } + auto finalizer = out->finalizePartAsync( new_data_part, data_settings->fsync_after_insert, @@ -460,8 +461,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( temp_part.part = new_data_part; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); - /// out.finish(new_data_part, std::move(written_files), sync_on_insert); - ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes()); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterCompressedBytes, new_data_part->getBytesOnDisk()); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 93b9f356595..4dc4d62c2a2 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -137,6 +137,7 @@ void MergeTreeSink::finishDelayedChunk() if (storage.renameTempPartAndAdd(part, context->getCurrentTransaction().get(), &storage.increment, nullptr, storage.getDeduplicationLog(), partition.block_dedup_token)) { PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns); + storage.incrementInsertedPartsProfileEvent(part->getType()); /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.background_operations_assignee.trigger(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 126d34bcc1d..de893d59b05 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -261,6 +261,7 @@ void ReplicatedMergeTreeSink::finishDelayedChunk(zkutil::ZooKeeperPtr & zookeepe /// Set a special error code if the block is duplicate int error = (deduplicate && part->is_duplicate) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns, ExecutionStatus(error)); + storage.incrementInsertedPartsProfileEvent(part->getType()); } catch (...) { diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.reference b/tests/queries/0_stateless/02306_part_types_profile_events.reference new file mode 100644 index 00000000000..7b5495f39fe --- /dev/null +++ b/tests/queries/0_stateless/02306_part_types_profile_events.reference @@ -0,0 +1,7 @@ +3 1 2 +2 1 1 +Compact +Compact +Wide +Compact 1 +Wide 1 diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.sql b/tests/queries/0_stateless/02306_part_types_profile_events.sql new file mode 100644 index 00000000000..0ec13bc3827 --- /dev/null +++ b/tests/queries/0_stateless/02306_part_types_profile_events.sql @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS t_parts_profile_events; + +CREATE TABLE t_parts_profile_events (a UInt32) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 10, min_bytes_for_wide_part = 0; + +SYSTEM STOP MERGES t_parts_profile_events; + +SET log_comment = '02306_part_types_profile_events'; + +INSERT INTO t_parts_profile_events VALUES (1); +INSERT INTO t_parts_profile_events VALUES (1); + +SYSTEM START MERGES t_parts_profile_events; +OPTIMIZE TABLE t_parts_profile_events FINAL; +SYSTEM STOP MERGES t_parts_profile_events; + +INSERT INTO t_parts_profile_events SELECT number FROM numbers(20); + +SYSTEM START MERGES t_parts_profile_events; +OPTIMIZE TABLE t_parts_profile_events FINAL; +SYSTEM STOP MERGES t_parts_profile_events; + +SYSTEM FLUSH LOGS; + +SELECT count(), sum(ProfileEvents['InsertedWideParts']), sum(ProfileEvents['InsertedCompactParts']) + FROM system.query_log WHERE has(databases, currentDatabase()) + AND log_comment = '02306_part_types_profile_events' + AND query ILIKE 'INSERT INTO%' AND type = 'QueryFinish'; + +SELECT count(), sum(ProfileEvents['MergedIntoWideParts']), sum(ProfileEvents['MergedIntoCompactParts']) + FROM system.query_log WHERE has(databases, currentDatabase()) + AND log_comment = '02306_part_types_profile_events' + AND query ILIKE 'OPTIMIZE TABLE%' AND type = 'QueryFinish'; + +SELECT part_type FROM system.part_log WHERE database = currentDatabase() + AND table = 't_parts_profile_events' AND event_type = 'NewPart' + ORDER BY event_time_microseconds; + +SELECT part_type, count() > 0 FROM system.part_log WHERE database = currentDatabase() + AND table = 't_parts_profile_events' AND event_type = 'MergeParts' + GROUP BY part_type; + +DROP TABLE t_parts_profile_events; From a33c7ce648ea3dc70b4aff08b65bdd2dfa5838d5 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 22:58:47 +0800 Subject: [PATCH 073/150] fix --- src/Common/FileCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index c28794ab23e..c2af2e07099 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -935,7 +935,7 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add( const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard & /* cache_lock */) { #ifndef NDEBUG - for (const auto & [entry_key, entry_offset, _, __] : queue) + for (const auto & [entry_key, entry_offset, entry_size, entry_hits] : queue) { if (entry_key == key && entry_offset == offset) throw Exception( From 6692b9c2ed4da7b126814d7911da885f595ff932 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 25 May 2022 12:11:44 -0400 Subject: [PATCH 074/150] showCertificate function implementation --- src/Functions/FunctionShowCertificate.cpp | 12 ++ src/Functions/FunctionShowCertificate.h | 189 ++++++++++++++++++++++ src/Functions/registerFunctions.cpp | 3 +- 3 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 src/Functions/FunctionShowCertificate.cpp create mode 100644 src/Functions/FunctionShowCertificate.h diff --git a/src/Functions/FunctionShowCertificate.cpp b/src/Functions/FunctionShowCertificate.cpp new file mode 100644 index 00000000000..e978f77244c --- /dev/null +++ b/src/Functions/FunctionShowCertificate.cpp @@ -0,0 +1,12 @@ +#include "FunctionShowCertificate.h" +#include + +namespace DB +{ + +void registerFunctionShowCertificate(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/FunctionShowCertificate.h b/src/Functions/FunctionShowCertificate.h new file mode 100644 index 00000000000..ce9b8f58103 --- /dev/null +++ b/src/Functions/FunctionShowCertificate.h @@ -0,0 +1,189 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if USE_SSL + #include + #include "Poco/Net/SSLManager.h" + #include "Poco/Crypto/X509Certificate.h" +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + +// showCertificate() +class FunctionShowCertificate : public IFunction +{ +public: + static constexpr auto name = "showCertificate"; + + static FunctionPtr create(ContextPtr) + { +#if !defined(USE_SSL) || USE_SSL == 0 + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support is disabled"); +#endif + return std::make_shared(); + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override + { + return std::make_shared(std::make_shared(), std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + MutableColumnPtr keys = DataTypeString().createColumn(); + MutableColumnPtr values = DataTypeString().createColumn(); + MutableColumnPtr offsets = DataTypeNumber().createColumn(); + + if (input_rows_count) + { +#if USE_SSL + if (const X509 * cert = SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext())) + { + BIO * b = BIO_new(BIO_s_mem()); + SCOPE_EXIT( + { + BIO_free(b); + }); + + keys->insert("version"); + values->insert(std::to_string(X509_get_version(cert) + 1)); + + { + char buf[1024] = {0}; + const ASN1_INTEGER * sn = cert->cert_info->serialNumber; + BIGNUM * bnsn = ASN1_INTEGER_to_BN(sn, nullptr); + SCOPE_EXIT( + { + BN_free(bnsn); + }); + if (BN_print(b, bnsn) > 0 && BIO_read(b, buf, sizeof(buf)) > 0) + { + keys->insert("serial_number"); + values->insert(buf); + } + + } + + { + const ASN1_BIT_STRING *sig = nullptr; + const X509_ALGOR *al = nullptr; + char buf[1024] = {0}; + X509_get0_signature(&sig, &al, cert); + if (al) + { + OBJ_obj2txt(buf, sizeof(buf), al->algorithm, 0); + keys->insert("signature_algo"); + values->insert(buf); + } + } + + char * issuer = X509_NAME_oneline(cert->cert_info->issuer, nullptr, 0); + if (issuer) + { + SCOPE_EXIT( + { + OPENSSL_free(issuer); + }); + keys->insert("issuer"); + values->insert(issuer); + } + + { + char buf[1024] = {0}; + if (ASN1_TIME_print(b, X509_get_notBefore(cert)) && BIO_read(b, buf, sizeof(buf)) > 0) + { + keys->insert("not_before"); + values->insert(buf); + } + } + + { + char buf[1024] = {0}; + if (ASN1_TIME_print(b, X509_get_notAfter(cert)) && BIO_read(b, buf, sizeof(buf)) > 0) + { + keys->insert("not_after"); + values->insert(buf); + } + } + + char * subject = X509_NAME_oneline(cert->cert_info->subject, nullptr, 0); + if (subject) + { + SCOPE_EXIT( + { + OPENSSL_free(subject); + }); + keys->insert("subject"); + values->insert(subject); + } + + if (X509_PUBKEY * pkey = X509_get_X509_PUBKEY(cert)) + { + char buf[1024] = {0}; + ASN1_OBJECT *ppkalg = nullptr; + const unsigned char *pk = nullptr; + int ppklen = 0; + X509_ALGOR *pa = nullptr; + if (X509_PUBKEY_get0_param(&ppkalg, &pk, &ppklen, &pa, pkey) && + i2a_ASN1_OBJECT(b, ppkalg) > 0 && BIO_read(b, buf, sizeof(buf)) > 0) + { + keys->insert("pkey_algo"); + values->insert(buf); + } + } + } + offsets->insert(keys->size()); +#endif + } + + size_t sz = keys->size(); + + if (sz && input_rows_count > 1) + { + keys->reserve(sz * input_rows_count); + values->reserve(sz * input_rows_count); + offsets->reserve(input_rows_count); + } + + for (size_t i = 1; i < input_rows_count; ++i) + { + for (size_t j = 0; j < sz; ++j) + { + keys->insertFrom(*keys, j); + values->insertFrom(*values, j); + } + offsets->insert(keys->size()); + } + + auto nested_column = ColumnArray::create( + ColumnTuple::create(Columns{std::move(keys), std::move(values)}), std::move(offsets)); + + return ColumnMap::create(nested_column); + } +}; + +} diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 0c67bf81d1e..bafaf61c2f5 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -68,7 +68,7 @@ void registerFunctionEncrypt(FunctionFactory & factory); void registerFunctionDecrypt(FunctionFactory & factory); void registerFunctionAESEncryptMysql(FunctionFactory & factory); void registerFunctionAESDecryptMysql(FunctionFactory & factory); - +void registerFunctionShowCertificate(FunctionFactory &); #endif void registerFunctions() @@ -135,6 +135,7 @@ void registerFunctions() registerFunctionDecrypt(factory); registerFunctionAESEncryptMysql(factory); registerFunctionAESDecryptMysql(factory); + registerFunctionShowCertificate(factory); #endif registerFunctionTid(factory); registerFunctionLogTrace(factory); From c7b16065e14b4e7f5c4179bc0262407ae4cbec6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 25 May 2022 21:47:05 +0200 Subject: [PATCH 075/150] Merge with master --- src/Common/ErrorCodes.cpp | 2 +- .../ObjectStorages/DiskObjectStorage.cpp | 11 +++++ src/Disks/ObjectStorages/DiskObjectStorage.h | 4 ++ .../DiskObjectStorageMetadataHelper.cpp | 41 ++++++++++++++--- .../DiskObjectStorageMetadataHelper.h | 45 ++++++++++++++----- src/Disks/ObjectStorages/S3/diskSettings.cpp | 9 ++-- .../ObjectStorages/S3/registerDiskS3.cpp | 1 + src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/StorageS3.cpp | 6 ++- src/Storages/StorageS3.h | 7 ++- 10 files changed, 103 insertions(+), 25 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ce457cda1f2..973dde10756 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -627,8 +627,8 @@ M(656, MEILISEARCH_EXCEPTION) \ M(657, UNSUPPORTED_MEILISEARCH_TYPE) \ M(658, MEILISEARCH_MISSING_SOME_COLUMNS) \ - M(659, HDFS_ERROR) \ M(659, UNKNOWN_STATUS_OF_TRANSACTION) \ + M(660, HDFS_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index c235e1a864a..65b1d5a5bdf 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -622,6 +622,17 @@ void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfig } } +void DiskObjectStorage::syncRevision(UInt64 revision) +{ + metadata_helper->syncRevision(revision); +} + +UInt64 DiskObjectStorage::getRevision() const +{ + return metadata_helper->getRevision(); +} + + DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const { if (i != 0) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 9a60a7ad25e..d89c00a5567 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -172,6 +172,10 @@ public: void restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void onFreeze(const String & path) override; + + void syncRevision(UInt64 revision) override; + + UInt64 getRevision() const override; private: const String name; const String remote_fs_root_path; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp index a7e34f7ccd4..b09debf9a43 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -23,7 +23,7 @@ static String revisionToString(UInt64 revision) void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const { - const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; + const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + operation_log_suffix + "-" + operation_name; auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); buf->write('0'); buf->finalize(); @@ -300,15 +300,45 @@ static String shrinkKey(const String & path, const String & key) static std::tuple extractRevisionAndOperationFromKey(const String & key) { String revision_str; + String suffix; String operation; - /// Key has format: ../../r{revision}-{operation} - static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + /// Key has format: ../../r{revision}(-{hostname})-{operation} + static const re2::RE2 key_regexp{".*/r(\\d+)(-[\\w\\d\\-\\.]+)?-(\\w+)$"}; - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + re2::RE2::FullMatch(key, key_regexp, &revision_str, &suffix, &operation); return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; } +void DiskObjectStorageMetadataHelper::moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata) +{ + if (disk->exists(to_path)) + { + if (send_metadata) + { + auto revision = ++revision_counter; + const ObjectAttributes object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + createFileOperationObject("rename", revision, object_metadata); + } + if (disk->isDirectory(from_path)) + { + for (auto it = disk->iterateDirectory(from_path); it->isValid(); it->next()) + moveRecursiveOrRemove(it->path(), fs::path(to_path) / it->name(), false); + } + else + { + disk->removeFile(from_path); + } + } + else + { + disk->moveFile(from_path, to_path, send_metadata); + } +} + void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) { LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); @@ -385,7 +415,6 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * sourc else continue; - disk->createDirectories(directoryPath(path)); auto relative_key = shrinkKey(source_path, key); @@ -457,7 +486,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * sou auto to_path = object_attributes["to_path"]; if (disk->exists(from_path)) { - disk->moveFile(from_path, to_path, send_metadata); + moveRecursiveOrRemove(from_path, to_path, send_metadata); LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 89153e4a39c..58ef8405a13 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -25,9 +26,37 @@ public: DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) : disk(disk_) , read_settings(std::move(read_settings_)) + , operation_log_suffix("-" + getFQDNOrHostName()) { } + /// Most important method, called on DiskObjectStorage startup + void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); + + void syncRevision(UInt64 revision) + { + UInt64 local_revision = revision_counter.load(); + while ((revision > local_revision) && revision_counter.compare_exchange_weak(local_revision, revision)); + } + + UInt64 getRevision() const + { + return revision_counter.load(); + } + + static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); + + void migrateToRestorableSchema(); + + void findLastRevision(); + + void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + + /// Version with possibility to backup-restore metadata. + static constexpr int RESTORABLE_SCHEMA_VERSION = 1; + + std::atomic revision_counter = 0; +private: struct RestoreInformation { UInt64 revision = LATEST_REVISION; @@ -38,32 +67,24 @@ public: using Futures = std::vector>; - void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + /// Move file or files in directory when possible and remove files in other case + /// to restore by S3 operation log with same operations from different replicas + void moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata); - void findLastRevision(); - - static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; void migrateToRestorableSchemaRecursive(const String & path, Futures & results); - void migrateToRestorableSchema(); - - /// Most important method, called on DiskObjectStorage startup - void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - std::atomic revision_counter = 0; inline static const String RESTORE_FILE_NAME = "restore"; /// Object contains information about schema version. inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; - /// Version with possibility to backup-restore metadata. - static constexpr int RESTORABLE_SCHEMA_VERSION = 1; /// Directories with data. const std::vector data_roots {"data", "store"}; @@ -72,6 +93,8 @@ public: ObjectStoragePtr object_storage_from_another_namespace; ReadSettings read_settings; + + String operation_log_suffix; }; } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 145bb4a3d66..79a7978c53e 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -14,9 +14,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -149,4 +149,5 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura } } ->>>>>> master:src/Disks/S3/registerDiskS3.cpp + +#endif diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 9c9c76ad451..d7e82ef3392 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -10,6 +10,7 @@ #include +#include #include #include diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 78896d74d09..0c834564ec4 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -518,7 +518,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); - UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); + UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); if (revision) disk->syncRevision(revision); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 7960b7dfac0..1dbf7b36f1b 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -239,7 +239,11 @@ private: }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr query, const Block & virtual_header, ContextPtr context) + const Aws::S3::S3Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context) : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context)) { } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index ef16982ba58..b246de18bfb 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -36,7 +36,12 @@ public: { public: DisclosedGlobIterator( - const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr query, const Block & virtual_header, ContextPtr context); + const Aws::S3::S3Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context); + String next(); private: From 4410d3d15f2f3c62b5c03fccdd391ba9e40722dc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 25 May 2022 22:49:54 +0200 Subject: [PATCH 076/150] Better test --- ...emote_filesystem_cache_on_insert.reference | 20 +++++++++++----- ...2241_remote_filesystem_cache_on_insert.sql | 23 ++++++++++++++----- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 783227d5587..9181fce3c90 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -3,6 +3,7 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +SYSTEM STOP MERGES test; SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size, state FROM @@ -95,6 +96,7 @@ INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); SELECT count() FROM system.filesystem_cache; 21 +SYSTEM START MERGES test OPTIMIZE TABLE test FINAL; SELECT count() FROM system.filesystem_cache; 27 @@ -104,12 +106,18 @@ SELECT count() FROM system.filesystem_cache; 28 INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); SYSTEM FLUSH LOGS; -SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM system.query_log -WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC +SYSTEM FLUSH LOGS; +SELECT + query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM + system.query_log +WHERE + query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' + AND type = 'QueryFinish' + AND current_database = currentDatabase() +ORDER BY + query_start_time + DESC LIMIT 1; SELECT count() FROM test; 5010500 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 31d4ca99abb..12875045373 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -79,6 +79,8 @@ FROM WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SYSTEM STOP MERGES test; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; @@ -89,6 +91,9 @@ SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); SELECT count() FROM system.filesystem_cache; + +SYSTEM START MERGES test; + OPTIMIZE TABLE test FINAL; SELECT count() FROM system.filesystem_cache; @@ -98,12 +103,18 @@ SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); SYSTEM FLUSH LOGS; -SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM system.query_log -WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC +SELECT + query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM + system.query_log +WHERE + query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' + AND type = 'QueryFinish' + AND current_database = currentDatabase() +ORDER BY + query_start_time + DESC LIMIT 1; + SELECT count() FROM test; SELECT count() FROM test WHERE value LIKE '%010%'; From e1ed2aa3dce2a51040a082611b2115f69b69e938 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 25 May 2022 15:15:11 +0200 Subject: [PATCH 077/150] Add failed builds to the build report --- .github/workflows/backport_branches.yml | 11 +++--- .github/workflows/master.yml | 22 +++++------ .github/workflows/pull_request.yml | 22 +++++------ .github/workflows/release_branches.yml | 11 +++--- tests/ci/build_check.py | 3 +- tests/ci/build_report_check.py | 52 +++++++++++++++++++++---- tests/ci/env_helper.py | 3 +- 7 files changed, 79 insertions(+), 45 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 57474c3d9dd..b93c1b61ffd 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -359,15 +359,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -382,8 +378,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2af54da5e16..c890488ea80 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -970,16 +970,12 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -994,8 +990,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | @@ -1018,15 +1017,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/report_check REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse special build check (actions) + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1041,8 +1036,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 01490dff59e..8c5fa59aabc 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1025,15 +1025,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1048,8 +1044,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | @@ -1073,15 +1072,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/report_check REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse special build check (actions) + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1096,8 +1091,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c16a4a6a568..a7a4b62a494 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -442,16 +442,12 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -466,8 +462,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f8397bf3e76..9730ac2cc46 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,7 +9,7 @@ import time from shutil import rmtree from typing import List, Optional, Tuple -from env_helper import REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH +from env_helper import GITHUB_JOB, REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH from s3_helper import S3Helper from pr_info import PRInfo from version_helper import ( @@ -138,6 +138,7 @@ def create_json_artifact( "build_config": build_config, "elapsed_seconds": elapsed, "status": success, + "job_name": GITHUB_JOB, } json_name = "build_urls_" + build_name + ".json" diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 44cc45390a5..b2d54eadd60 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -9,11 +9,11 @@ from typing import Dict, List, Tuple from github import Github from env_helper import ( + GITHUB_REPOSITORY, + GITHUB_RUN_URL, + GITHUB_SERVER_URL, REPORTS_PATH, TEMP_PATH, - GITHUB_REPOSITORY, - GITHUB_SERVER_URL, - GITHUB_RUN_URL, ) from report import create_build_html_report from s3_helper import S3Helper @@ -24,6 +24,9 @@ from ci_config import CI_CONFIG from rerun_helper import RerunHelper +NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH") + + class BuildResult: def __init__( self, @@ -76,6 +79,23 @@ def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: return groups +def get_failed_report( + job_name: str, +) -> Tuple[List[BuildResult], List[List[str]], List[str]]: + message = f"{job_name} failed" + build_result = BuildResult( + compiler="unknown", + build_type="unknown", + sanitizer="unknown", + bundled="unknown", + splitted="unknown", + status=message, + elapsed_seconds=0, + with_coverage=False, + ) + return [build_result], [[""]], [""] + + def process_report( build_report, ) -> Tuple[List[BuildResult], List[List[str]], List[str]]: @@ -117,15 +137,19 @@ def get_build_name_from_file_name(file_name): def main(): logging.basicConfig(level=logging.INFO) - reports_path = REPORTS_PATH temp_path = TEMP_PATH - logging.info("Reports path %s", reports_path) + logging.info("Reports path %s", REPORTS_PATH) if not os.path.exists(temp_path): os.makedirs(temp_path) build_check_name = sys.argv[1] - required_builds = int(sys.argv[2]) if len(sys.argv) > 2 else 0 + needs_data = None + required_builds = 0 + if os.path.exists(NEEDS_DATA_PATH): + with open(NEEDS_DATA_PATH, "rb") as file_handler: + needs_data = json.load(file_handler) + required_builds = len(needs_data) gh = Github(get_best_robot_token()) pr_info = PRInfo() @@ -140,7 +164,7 @@ def main(): # Collect reports from json artifacts builds_report_map = {} - for root, _, files in os.walk(reports_path): + for root, _, files in os.walk(REPORTS_PATH): for f in files: if f.startswith("build_urls_") and f.endswith(".json"): logging.info("Found build report json %s", f) @@ -163,12 +187,18 @@ def main(): ] some_builds_are_missing = len(build_reports) < required_builds + missing_build_names = [] if some_builds_are_missing: logging.warning( "Expected to get %s build results, got only %s", required_builds, len(build_reports), ) + missing_build_names = [ + name + for name in needs_data + if not any(rep for rep in build_reports if rep["job_name"] == name) + ] else: logging.info("Got exactly %s builds", len(builds_report_map)) @@ -186,6 +216,14 @@ def main(): build_artifacts.extend(build_artifacts_url) build_logs.extend(build_logs_url) + for failed_job in missing_build_names: + build_result, build_artifacts_url, build_logs_url = get_failed_report( + failed_job + ) + build_results.extend(build_result) + build_artifacts.extend(build_artifacts_url) + build_logs.extend(build_logs_url) + total_groups = len(build_results) logging.info("Totally got %s artifact groups", total_groups) if total_groups == 0: diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index c97c6298acc..dd081523db1 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -5,11 +5,12 @@ module_dir = p.abspath(p.dirname(__file__)) git_root = p.abspath(p.join(module_dir, "..", "..")) CI = bool(os.getenv("CI")) -TEMP_PATH = os.getenv("TEMP_PATH", module_dir) +TEMP_PATH = os.getenv("TEMP_PATH", p.abspath(p.join(module_dir, "./tmp"))) CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH", "") +GITHUB_JOB = os.getenv("GITHUB_JOB", "local") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") From 3c1b6609ae76aaba93fbf526b3892cf060698048 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 25 May 2022 21:23:35 +0000 Subject: [PATCH 078/150] Add comments and make tests more verbose --- src/Core/NamesAndTypes.h | 3 +- src/Functions/grouping.h | 12 +++ .../02293_grouping_function.reference | 83 ++++++++++++++++ .../0_stateless/02293_grouping_function.sql | 1 + ...02293_grouping_function_group_by.reference | 97 +++++++++++++++++++ .../02293_grouping_function_group_by.sql | 1 + 6 files changed, 196 insertions(+), 1 deletion(-) diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index c7a51f51816..b9c03aae0ca 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -105,9 +105,10 @@ public: /// Check that column contains in list bool contains(const String & name) const; - /// Try to get column by name, return empty optional if column not found + /// Try to get column by name, returns empty optional if column not found std::optional tryGetByName(const std::string & name) const; + /// Try to get column position by name, returns number of columns if column isn't found size_t getPosByName(const std::string & name) const noexcept; }; diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index 934be18345d..7c8970667da 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -93,6 +93,12 @@ public: return FunctionGroupingBase::executeImpl(arguments, input_rows_count, [this](UInt64 set_index, UInt64 arg_index) { + // For ROLLUP(a, b, c) there will be following grouping set indexes: + // | GROUPING SET | INDEX | + // | (a, b, c) | 0 | + // | (a, b) | 1 | + // | (a) | 2 | + // | () | 3 | return arg_index < aggregation_keys_number - set_index; } ); @@ -117,6 +123,12 @@ public: return FunctionGroupingBase::executeImpl(arguments, input_rows_count, [this](UInt64 set_index, UInt64 arg_index) { + // For CUBE(a, b) there will be following grouping set indexes: + // | GROUPING SET | INDEX | + // | (a, b) | 0 | + // | (a) | 1 | + // | (b) | 2 | + // | () | 3 | auto set_mask = (ONE << aggregation_keys_number) - 1 - set_index; return set_mask & (ONE << (aggregation_keys_number - arg_index - 1)); } diff --git a/tests/queries/0_stateless/02293_grouping_function.reference b/tests/queries/0_stateless/02293_grouping_function.reference index dbae7a11f2e..e71d6812ab5 100644 --- a/tests/queries/0_stateless/02293_grouping_function.reference +++ b/tests/queries/0_stateless/02293_grouping_function.reference @@ -1,3 +1,14 @@ +-- { echoOn } +SELECT + number, + grouping(number, number % 2) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; 0 1 0 1 0 2 @@ -10,6 +21,16 @@ 7 2 8 2 9 2 +SELECT + number, + grouping(number % 2, number) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; 0 1 0 2 0 2 @@ -22,6 +43,16 @@ 7 1 8 1 9 1 +SELECT + number, + grouping(number, number % 2) = 1 AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, gr; 0 0 0 1 0 1 @@ -34,6 +65,15 @@ 7 0 8 0 9 0 +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY number, grouping(number, number % 2) = 1; 0 0 0 @@ -46,6 +86,18 @@ 7 8 9 +SELECT + number, + count(), + grouping(number, number % 2) AS gr +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number, number % 2), + () + ) +ORDER BY (gr, number); 0 10 0 0 1 2 1 1 2 @@ -67,6 +119,17 @@ 7 1 3 8 1 3 9 1 3 +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2) = 2 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; 0 1 2 @@ -77,8 +140,28 @@ 7 8 9 +SELECT + number +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +HAVING grouping(number, number % 2) = 1 +ORDER BY number +SETTINGS enable_optimize_predicate_expression = 0; 0 0 +SELECT + number, + GROUPING(number, number % 2) = 1 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2)) +ORDER BY number, gr; 0 0 0 1 0 1 diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index 4bbf620a619..169fc09c324 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -9,6 +9,7 @@ GROUP BY ) ORDER BY number, gr; -- { serverError BAD_ARGUMENTS } +-- { echoOn } SELECT number, grouping(number, number % 2) AS gr diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.reference b/tests/queries/0_stateless/02293_grouping_function_group_by.reference index 021083db6eb..7f87aecd4bd 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.reference +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.reference @@ -1,3 +1,12 @@ +-- { echoOn } +SELECT + number, + grouping(number, number % 2) = 3 +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; 0 1 1 1 2 1 @@ -8,6 +17,15 @@ 7 1 8 1 9 1 +SELECT + number, + grouping(number), + GROUPING(number % 2) +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 +ORDER BY number; 0 1 1 1 1 1 2 1 1 @@ -18,6 +36,16 @@ 7 1 1 8 1 1 9 1 1 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH ROLLUP +ORDER BY + number, gr; 0 0 0 2 0 3 @@ -39,6 +67,14 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) +ORDER BY + number, gr; 0 0 0 2 0 3 @@ -60,6 +96,16 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + number, + number % 2 + WITH CUBE +ORDER BY + number, gr; 0 0 0 1 0 1 @@ -83,6 +129,14 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) AS gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +ORDER BY + number, gr; 0 0 0 1 0 1 @@ -106,6 +160,15 @@ 8 3 9 2 9 3 +SELECT + number, + grouping(number, number % 2) + 3 as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) +HAVING grouping(number) != 0 +ORDER BY + number, gr; 0 5 0 6 1 5 @@ -126,6 +189,23 @@ 8 6 9 5 9 6 +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + CUBE(number, number % 2) WITH TOTALS +ORDER BY + number, gr; 0 0 0 1 0 1 @@ -151,6 +231,23 @@ 9 3 0 0 +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +HAVING grouping(number) != 0 +ORDER BY + number, gr; -- { serverError NOT_IMPLEMENTED } +SELECT + number, + grouping(number, number % 2) as gr +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + ROLLUP(number, number % 2) WITH TOTALS +ORDER BY + number, gr; 0 0 0 2 0 3 diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index b30080b88af..9bf9d43478b 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -7,6 +7,7 @@ GROUP BY number % 2 ORDER BY number; -- { serverError BAD_ARGUMENTS } +-- { echoOn } SELECT number, grouping(number, number % 2) = 3 From 7cd7782e4f25b0a2aad9cfa69cd9ffcb4da1e6cc Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 25 May 2022 21:55:41 +0000 Subject: [PATCH 079/150] Process columns more efficiently in GROUPING() --- src/Functions/grouping.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index 7c8970667da..a49e946b2cb 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -43,18 +43,20 @@ public: template ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count, AggregationKeyChecker checker) const { - auto grouping_set_column = checkAndGetColumn(arguments[0].column.get()); + const auto * grouping_set_column = checkAndGetColumn(arguments[0].column.get()); - auto result = std::make_shared()->createColumn(); + auto result = ColumnUInt64::create(); + auto & result_data = result->getData(); + result_data.reserve(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { - UInt64 set_index = grouping_set_column->get64(i); + UInt64 set_index = grouping_set_column->getElement(i); UInt64 value = 0; for (auto index : arguments_indexes) value = (value << 1) + (checker(set_index, index) ? 1 : 0); - result->insert(Field(value)); + result_data.push_back(value); } return result; } From 8f1aac0ce46cc7818969a27cfc59b2f915523ac1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 26 May 2022 00:44:45 +0200 Subject: [PATCH 080/150] Fix merge with master --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 07c46c20adf..1d7fd9a52ce 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -519,7 +519,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); - UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); + UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); if (revision) disk->syncRevision(revision); From 16c6b6070344783c9fcbc935fc572da3207b07ec Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 25 May 2022 23:22:29 +0000 Subject: [PATCH 081/150] Introduce AggregationKeysInfo --- src/Interpreters/ActionsVisitor.cpp | 26 ++++++++--------- src/Interpreters/ActionsVisitor.h | 37 ++++++++++++++++++++----- src/Interpreters/ExpressionAnalyzer.cpp | 12 ++------ src/Interpreters/ExpressionAnalyzer.h | 5 ++++ src/Storages/VirtualColumnUtils.cpp | 7 ++++- 5 files changed, 56 insertions(+), 31 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index d22989219a4..99d2217cba3 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -473,9 +473,7 @@ ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, - const NamesAndTypesList & aggregation_keys_, - const ColumnNumbersList & grouping_set_keys_, + std::reference_wrapper source_columns_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -483,22 +481,20 @@ ActionsMatcher::Data::Data( bool no_makeset_, bool only_consts_, bool create_source_for_in_, - GroupByKind group_by_kind_) + AggregationKeysInfo aggregation_keys_info_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) , source_columns(source_columns_) - , aggregation_keys(aggregation_keys_) - , grouping_set_keys(grouping_set_keys_) , prepared_sets(prepared_sets_) , subqueries_for_sets(subqueries_for_sets_) , no_subqueries(no_subqueries_) , no_makeset(no_makeset_) , only_consts(only_consts_) , create_source_for_in(create_source_for_in_) - , group_by_kind(group_by_kind_) , visit_depth(0) , actions_stack(std::move(actions_dag), context_) + , aggregation_keys_info(aggregation_keys_info_) , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) { } @@ -848,29 +844,31 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument"); if (arguments_size > 64) throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size); + auto keys_info = data.aggregation_keys_info; + auto aggregation_keys_number = keys_info.aggregation_keys.size(); + ColumnNumbers arguments_indexes; - auto aggregation_keys_number = data.aggregation_keys.size(); for (auto const & arg : node.arguments->children) { - size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName()); + size_t pos = keys_info.aggregation_keys.getPosByName(arg->getColumnName()); if (pos == aggregation_keys_number) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of GROUPING function {} is not a part of GROUP BY clause", arg->getColumnName()); arguments_indexes.push_back(pos); } - switch (data.group_by_kind) + switch (keys_info.group_by_kind) { case GroupByKind::GROUPING_SETS: { - data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.grouping_set_keys)), { "__grouping_set" }, column_name); + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), keys_info.grouping_set_keys)), { "__grouping_set" }, column_name); break; } case GroupByKind::ROLLUP: - data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name); break; case GroupByKind::CUBE: { - data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), data.aggregation_keys.size())), { "__grouping_set" }, column_name); + data.addFunction(std::make_shared(std::make_shared(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name); break; } case GroupByKind::ORDINARY: @@ -880,7 +878,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } default: throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected kind of GROUP BY clause for GROUPING function: {}", data.group_by_kind); + "Unexpected kind of GROUP BY clause for GROUPING function: {}", keys_info.group_by_kind); } return; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 5fd228ba836..5a74124192c 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -87,6 +87,33 @@ enum class GroupByKind GROUPING_SETS, }; +/* + * This class stores information about aggregation keys used in GROUP BY clause. + * It's used for providing information about aggregation to GROUPING function + * implementation. +*/ +struct AggregationKeysInfo +{ + AggregationKeysInfo( + std::reference_wrapper aggregation_keys_, + std::reference_wrapper grouping_set_keys_, + GroupByKind group_by_kind_) + : aggregation_keys(aggregation_keys_) + , grouping_set_keys(grouping_set_keys_) + , group_by_kind(group_by_kind_) + {} + + AggregationKeysInfo(const AggregationKeysInfo &) = default; + AggregationKeysInfo(AggregationKeysInfo &&) = default; + + // Names and types of all used keys + const NamesAndTypesList & aggregation_keys; + // Indexes of aggregation keys used in each grouping set (only for GROUP BY GROUPING SETS) + const ColumnNumbersList & grouping_set_keys; + + GroupByKind group_by_kind; +}; + /// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too. class ActionsMatcher { @@ -98,17 +125,15 @@ public: SizeLimits set_size_limit; size_t subquery_depth; const NamesAndTypesList & source_columns; - const NamesAndTypesList & aggregation_keys; - const ColumnNumbersList & grouping_set_keys; PreparedSets & prepared_sets; SubqueriesForSets & subqueries_for_sets; bool no_subqueries; bool no_makeset; bool only_consts; bool create_source_for_in; - GroupByKind group_by_kind; size_t visit_depth; ScopeStack actions_stack; + AggregationKeysInfo aggregation_keys_info; /* * Remember the last unique column suffix to avoid quadratic behavior @@ -121,9 +146,7 @@ public: ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, - const NamesAndTypesList & aggregation_keys_, - const ColumnNumbersList & grouping_set_keys_, + std::reference_wrapper source_columns_, ActionsDAGPtr actions_dag, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -131,7 +154,7 @@ public: bool no_makeset_, bool only_consts_, bool create_source_for_in_, - GroupByKind group_by_kind_); + AggregationKeysInfo aggregation_keys_info_); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3931180d941..699f73abd67 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -603,8 +603,6 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ settings.size_limits_for_set, subquery_depth, sourceColumns(), - aggregation_keys, - aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -612,7 +610,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ false /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - group_by_kind); + getAggregationKeysInfo()); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -626,8 +624,6 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP settings.size_limits_for_set, subquery_depth, sourceColumns(), - aggregation_keys, - aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -635,7 +631,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - group_by_kind); + getAggregationKeysInfo()); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -650,8 +646,6 @@ void ExpressionAnalyzer::getRootActionsForHaving( settings.size_limits_for_set, subquery_depth, sourceColumns(), - aggregation_keys, - aggregation_keys_indexes_list, std::move(actions), prepared_sets, subqueries_for_sets, @@ -659,7 +653,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( false /* no_makeset */, only_consts, true /* create_source_for_in */, - group_by_kind); + getAggregationKeysInfo()); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ecac8cba771..971fe753978 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -205,6 +205,11 @@ protected: NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); + + AggregationKeysInfo getAggregationKeysInfo() const noexcept + { + return { aggregation_keys, aggregation_keys_indexes_list, group_by_kind }; + } }; class SelectQueryExpressionAnalyzer; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index dd6c30e3c79..40cf650f690 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -156,8 +156,13 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block auto actions = std::make_shared(block.getColumnsWithTypeAndName()); PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; + const NamesAndTypesList source_columns; + const NamesAndTypesList aggregation_keys; + const ColumnNumbersList grouping_set_keys; + ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, {}, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, GroupByKind::NONE); + context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false, + { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); From f488efd27e5d696834bf6749eb4c0d545576eb81 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 26 May 2022 00:03:31 +0000 Subject: [PATCH 082/150] fix tests --- src/Storages/MergeTree/MergeTreeData.cpp | 56 +++++++++---------- .../02306_part_types_profile_events.sql | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 62c11a31f68..6d72c11c3fc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6763,42 +6763,42 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } -#define FOR_EACH_PART_TYPE(M) \ - M(Wide) \ - M(Compact) \ - M(InMemory) - -#define DECLARE_INCREMENT_EVENT_CASE(Event, Type) \ - case MergeTreeDataPartType::Type: \ - ProfileEvents::increment(ProfileEvents::Event##Type##Parts); \ - break; - -#define DECLARE_INCREMENT_EVENT(value, CASE) \ - switch (value) \ - { \ - FOR_EACH_PART_TYPE(CASE) \ - default: \ - break; \ - } - void MergeTreeData::incrementInsertedPartsProfileEvent(MergeTreeDataPartType type) { - #define DECLARE_INSERTED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(Inserted, Type) - DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_INSERTED_EVENT_CASE) - #undef DECLARE_INSERTED_EVENT + switch (type.getValue()) + { + case MergeTreeDataPartType::Wide: + ProfileEvents::increment(ProfileEvents::InsertedWideParts); + break; + case MergeTreeDataPartType::Compact: + ProfileEvents::increment(ProfileEvents::InsertedCompactParts); + break; + case MergeTreeDataPartType::InMemory: + ProfileEvents::increment(ProfileEvents::InsertedInMemoryParts); + break; + default: + break; + } } void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) { - #define DECLARE_MERGED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(MergedInto, Type) - DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_MERGED_EVENT_CASE) - #undef DECLARE_MERGED_EVENT + switch (type.getValue()) + { + case MergeTreeDataPartType::Wide: + ProfileEvents::increment(ProfileEvents::MergedIntoWideParts); + break; + case MergeTreeDataPartType::Compact: + ProfileEvents::increment(ProfileEvents::MergedIntoCompactParts); + break; + case MergeTreeDataPartType::InMemory: + ProfileEvents::increment(ProfileEvents::MergedIntoInMemoryParts); + break; + default: + break; + } } -#undef FOR_EACH_PART_TYPE -#undef DECLARE_INCREMENT_EVENT_CASE -#undef DECLARE_INCREMENT_EVENT - CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger() { std::lock_guard lock(storage.currently_submerging_emerging_mutex); diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.sql b/tests/queries/0_stateless/02306_part_types_profile_events.sql index 0ec13bc3827..fd6178941f2 100644 --- a/tests/queries/0_stateless/02306_part_types_profile_events.sql +++ b/tests/queries/0_stateless/02306_part_types_profile_events.sql @@ -24,7 +24,7 @@ SYSTEM STOP MERGES t_parts_profile_events; SYSTEM FLUSH LOGS; SELECT count(), sum(ProfileEvents['InsertedWideParts']), sum(ProfileEvents['InsertedCompactParts']) - FROM system.query_log WHERE has(databases, currentDatabase()) + FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02306_part_types_profile_events' AND query ILIKE 'INSERT INTO%' AND type = 'QueryFinish'; From 2dc160a4c334cd06aedbbad8ead5b4e7bf910f78 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 25 May 2022 20:56:36 -0400 Subject: [PATCH 083/150] style fix --- src/Functions/FunctionShowCertificate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionShowCertificate.h b/src/Functions/FunctionShowCertificate.h index ce9b8f58103..bb3c0fd8299 100644 --- a/src/Functions/FunctionShowCertificate.h +++ b/src/Functions/FunctionShowCertificate.h @@ -111,7 +111,7 @@ public: keys->insert("issuer"); values->insert(issuer); } - + { char buf[1024] = {0}; if (ASN1_TIME_print(b, X509_get_notBefore(cert)) && BIO_read(b, buf, sizeof(buf)) > 0) From abe6b5d0133c00c1f672f4d5bf5ae2acefd18e26 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 26 May 2022 10:09:27 +0300 Subject: [PATCH 084/150] Reverted unnecessary modification --- src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index eee046206ba..7dee7b8d0f8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -156,7 +156,7 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) index_granularity_bytes = 0; if (checkString("constraints: ", in)) - in >> " " >> constraints >> "\n"; + in >> constraints >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) From 822ecd982f42d6e7c63b9ad91dc8cddd1ce62b48 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:36:05 +0800 Subject: [PATCH 085/150] better & support clean stash --- src/Common/FileCache.cpp | 15 ++++++++++++--- src/Common/FileCache.h | 2 ++ src/Common/FileSegment.cpp | 3 +++ src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 5 ++++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index c2af2e07099..6c76bf5c0b3 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -408,7 +408,7 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( auto skip_or_download = [&]() -> FileSegmentPtr { - if (state == FileSegment::State::EMPTY) + if (state == FileSegment::State::EMPTY && enable_cache_hits_threshold) { auto record = records.find({key, offset}); @@ -425,8 +425,7 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( } /// For segments that do not reach the download threshold, we do not download them, but directly read them - state = queue_iter->hits >= enable_cache_hits_threshold ? FileSegment::State::EMPTY : FileSegment::State::SKIP_CACHE; - return std::make_shared(offset, size, key, this, state); + return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); } else { @@ -673,6 +672,10 @@ void LRUFileCache::remove() } } } + + /// Remove all access information. + records.clear(); + stash_queue.removeAll(cache_lock); } void LRUFileCache::remove( @@ -955,6 +958,12 @@ void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard & /* cache_lock */) +{ + queue.clear(); + cache_size = 0; +} + void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard & /* cache_lock */) { queue.splice(queue.end(), queue, queue_it); diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index a367d47885b..f66287b805f 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -196,6 +196,8 @@ private: Iterator end() { return queue.end(); } + void removeAll(std::lock_guard & cache_lock); + private: std::list queue; size_t cache_size = 0; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index aee3d470f44..27a111c1297 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -532,7 +532,10 @@ void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); if (is_last_holder && download_state == State::SKIP_CACHE) + { cache->remove(key(), offset(), cache_lock, segment_lock); + return; + } if (download_state == State::SKIP_CACHE || is_detached) return; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 1cb6354d38c..0c0d14c247d 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -491,7 +491,10 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() /// Do not hold pointer to file segment if it is not needed anymore /// so can become releasable and can be evicted from cache. - file_segments_holder->file_segments.erase(file_segment_it); + /// If the status of filesegment state is SKIP_CACHE, it will not be deleted. + /// It will be deleted from the cache when the holder is destructed. + if ((*file_segment_it)->state() != FileSegment::State::SKIP_CACHE) + file_segments_holder->file_segments.erase(file_segment_it); if (current_file_segment_it == file_segments_holder->file_segments.end()) return false; From f5a186c774685c0315b6834fa2e04658a14af909 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:36:26 +0800 Subject: [PATCH 086/150] add test for local cache download --- tests/config/config.d/storage_conf.xml | 20 ++++++++++++++++++- ...0_system_remote_filesystem_cache.reference | 18 +++++++++++++++++ .../02240_system_remote_filesystem_cache.sql | 17 ++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 45fad002c88..90a679e4cdd 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -9,7 +9,6 @@ 1 0 22548578304 - 1 ./s3_cache/ @@ -22,6 +21,18 @@ 22548578304 0 + + s3 + http://localhost:11111/test/00170_test/ + clickhouse + clickhouse + 1 + 0 + 22548578304 + 1 + ./s3_cache/ + 1 + @@ -38,6 +49,13 @@ + + +

+ s3_cache_3 +
+ + diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index 8bcb7e1dd42..d50f0847104 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -2,6 +2,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; +set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -17,3 +18,20 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy 0 745 746 SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +set enable_cache_hits_threshold=1; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 0 1 +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 0 1 +0 745 746 +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index aa469779130..5e3991a44c0 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -4,6 +4,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; +set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -16,3 +17,19 @@ SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; + +set enable_cache_hits_threshold=1; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; \ No newline at end of file From ab4614dee5c4fabb29ea1e5c9b6a251e057b3373 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:39:50 +0800 Subject: [PATCH 087/150] fix --- tests/config/config.d/storage_conf.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 90a679e4cdd..2637f80f75f 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -9,6 +9,7 @@ 1 0 22548578304 + 1 ./s3_cache/ From 824628c0dace2a51ea3c7c8d1b6499a36200e6d7 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:51:16 +0800 Subject: [PATCH 088/150] fix style --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 0c0d14c247d..1e63dab165e 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -491,7 +491,7 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() /// Do not hold pointer to file segment if it is not needed anymore /// so can become releasable and can be evicted from cache. - /// If the status of filesegment state is SKIP_CACHE, it will not be deleted. + /// If the status of filesegment state is SKIP_CACHE, it will not be deleted. /// It will be deleted from the cache when the holder is destructed. if ((*file_segment_it)->state() != FileSegment::State::SKIP_CACHE) file_segments_holder->file_segments.erase(file_segment_it); From 286c13317ffc90f979084b04995c65c9aedc75d9 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 17:38:42 +0800 Subject: [PATCH 089/150] fix stateless test --- .../0_stateless/02240_system_remote_filesystem_cache.reference | 2 -- .../0_stateless/02240_system_remote_filesystem_cache.sql | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index d50f0847104..cfe1ad411cb 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -2,7 +2,6 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; -set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -18,7 +17,6 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy 0 745 746 SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -set enable_cache_hits_threshold=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index 5e3991a44c0..60a8eba8f3e 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -4,7 +4,6 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; -set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -18,7 +17,6 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -set enable_cache_hits_threshold=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); From 55379f0d03a2a5ac091a0de037ddb097776fd982 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 20:59:31 +0800 Subject: [PATCH 090/150] fix config --- tests/config/config.d/storage_conf.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 2637f80f75f..b22225163f8 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -31,7 +31,6 @@ 0 22548578304 1 - ./s3_cache/ 1
From 8ae277a1c4a76869616168fb0dc71d06313af311 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 26 May 2022 15:25:48 +0200 Subject: [PATCH 091/150] Fix test --- .../02241_remote_filesystem_cache_on_insert.reference | 3 +-- .../0_stateless/02241_remote_filesystem_cache_on_insert.sql | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 9181fce3c90..3627978dfbc 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -96,7 +96,7 @@ INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); SELECT count() FROM system.filesystem_cache; 21 -SYSTEM START MERGES test +SYSTEM START MERGES test; OPTIMIZE TABLE test FINAL; SELECT count() FROM system.filesystem_cache; 27 @@ -106,7 +106,6 @@ SELECT count() FROM system.filesystem_cache; 28 INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); SYSTEM FLUSH LOGS; -SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read FROM diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 12875045373..8b3f6a9c0b1 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -6,9 +6,8 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; - +SYSTEM STOP MERGES test; SYSTEM DROP FILESYSTEM CACHE; - SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( @@ -79,8 +78,6 @@ FROM WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; -SYSTEM STOP MERGES test; - SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; From c862f89b8d384227adf8bc4f4c3ba19b8a52179a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 26 May 2022 15:43:21 +0200 Subject: [PATCH 092/150] Fix tidy --- src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp index b09debf9a43..a36c987db7d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -303,7 +303,7 @@ static std::tuple extractRevisionAndOperationFromKey(const Strin String suffix; String operation; /// Key has format: ../../r{revision}(-{hostname})-{operation} - static const re2::RE2 key_regexp{".*/r(\\d+)(-[\\w\\d\\-\\.]+)?-(\\w+)$"}; + static const re2::RE2 key_regexp{R"(.*/r(\d+)(-[\w\d\-\.]+)?-(\w+)$)"}; re2::RE2::FullMatch(key, key_regexp, &revision_str, &suffix, &operation); From 36af6b1fa8486456d5f183b374518310eca48bde Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 26 May 2022 16:15:02 +0200 Subject: [PATCH 093/150] Fix assertion --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 33 +------------------ 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 1cb6354d38c..e323d1a4f0e 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -212,38 +212,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; return getRemoteFSReadBuffer(file_segment, read_type); } - case FileSegment::State::EMPTY: - { - auto downloader_id = file_segment->getOrSetDownloader(); - if (downloader_id == file_segment->getCallerId()) - { - if (file_offset_of_buffer_end == file_segment->getDownloadOffset()) - { - read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; - return getRemoteFSReadBuffer(file_segment, read_type); - } - else - { - /// segment{k} - /// cache: [______|___________ - /// ^ - /// download_offset - /// requested_range: [__________] - /// ^ - /// file_offset_of_buffer_end - assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); - bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); - - read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; - return getRemoteFSReadBuffer(file_segment, read_type); - } - } - else - { - download_state = file_segment->state(); - continue; - } - } case FileSegment::State::DOWNLOADING: { size_t download_offset = file_segment->getDownloadOffset(); @@ -280,6 +248,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( read_type = ReadType::CACHED; return getCacheReadBuffer(range.left); } + case FileSegment::State::EMPTY: case FileSegment::State::PARTIALLY_DOWNLOADED: { if (file_segment->getDownloadOffset() > file_offset_of_buffer_end) From c6c60364ae33c644a9aefedb5ad634db006d1550 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 26 May 2022 21:20:27 +0300 Subject: [PATCH 094/150] Remove unused MergeTreeDataMergerMutator::chooseMergeAlgorithm() In favor of MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm() Signed-off-by: Azat Khuzhin --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 40 ------------------- .../MergeTree/MergeTreeDataMergerMutator.h | 9 ----- 2 files changed, 49 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c2c23793580..f596828ed05 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -535,46 +535,6 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( } -MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm( - const MergeTreeData::DataPartsVector & parts, - size_t sum_rows_upper_bound, - const NamesAndTypesList & gathering_columns, - bool deduplicate, - bool need_remove_expired_values, - const MergeTreeData::MergingParams & merging_params) const -{ - const auto data_settings = data.getSettings(); - - if (deduplicate) - return MergeAlgorithm::Horizontal; - if (data_settings->enable_vertical_merge_algorithm == 0) - return MergeAlgorithm::Horizontal; - if (need_remove_expired_values) - return MergeAlgorithm::Horizontal; - - for (const auto & part : parts) - if (!part->supportsVerticalMerge()) - return MergeAlgorithm::Horizontal; - - bool is_supported_storage = - merging_params.mode == MergeTreeData::MergingParams::Ordinary || - merging_params.mode == MergeTreeData::MergingParams::Collapsing || - merging_params.mode == MergeTreeData::MergingParams::Replacing || - merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing; - - bool enough_ordinary_cols = gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; - - bool enough_total_rows = sum_rows_upper_bound >= data_settings->vertical_merge_algorithm_min_rows_to_activate; - - bool no_parts_overflow = parts.size() <= RowSourcePart::MAX_PARTS; - - auto merge_alg = (is_supported_storage && enough_total_rows && enough_ordinary_cols && no_parts_overflow) ? - MergeAlgorithm::Vertical : MergeAlgorithm::Horizontal; - - return merge_alg; -} - - MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index f99f3d2e70d..a5f99c63f11 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -154,15 +154,6 @@ public : ActionBlocker ttl_merges_blocker; private: - - MergeAlgorithm chooseMergeAlgorithm( - const MergeTreeData::DataPartsVector & parts, - size_t rows_upper_bound, - const NamesAndTypesList & gathering_columns, - bool deduplicate, - bool need_remove_expired_values, - const MergeTreeData::MergingParams & merging_params) const; - MergeTreeData & data; const size_t max_tasks_count; From d71780d34ca36200493b06e44ddac688263e8e68 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 02:18:38 +0200 Subject: [PATCH 095/150] Fix some tests --- ...insert_without_explicit_database.reference | 4 +- ...s_and_insert_without_explicit_database.sql | 4 +- .../00206_empty_array_to_single.reference | 2 +- tests/queries/0_stateless/00301_csv.reference | 2 +- .../0_stateless/00502_sum_map.reference | 2 +- .../00506_shard_global_in_union.reference | 16 +- ...aving_time_shift_backwards_at_midnight.sql | 2 +- .../00927_asof_join_other_types.reference | 12 +- .../01087_storage_generate.reference | 200 +++++++++--------- .../0_stateless/01087_storage_generate.sql | 5 +- .../01087_table_function_generate.reference | 84 ++++---- .../01087_table_function_generate.sql | 17 +- .../01186_conversion_to_nullable.reference | 8 +- .../01280_min_map_max_map.reference | 4 +- ..._date_time_best_effort_timestamp.reference | 2 +- .../01440_to_date_monotonicity.reference | 2 +- .../01676_reinterpret_as.reference | 4 +- .../01691_DateTime64_clamp.reference | 10 +- .../01692_DateTime64_from_DateTime.reference | 4 +- .../0_stateless/01699_timezoneOffset.sql | 17 +- ..._toDateTime_from_string_clamping.reference | 2 +- ...re_consistent_datetime64_parsing.reference | 6 +- .../01734_datetime64_from_float.reference | 6 +- .../01772_to_start_of_hour_align.sql | 4 +- .../01852_map_combinator.reference | 2 +- ...68_order_by_fill_with_datetime64.reference | 28 +-- .../01921_datatype_date32.reference | 8 +- .../02096_date_time_1970_saturation.reference | 14 +- .../02184_default_table_engine.reference | 4 +- .../02184_default_table_engine.sql | 4 +- 30 files changed, 238 insertions(+), 241 deletions(-) diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference index 2176028b1c0..788c54e3b42 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference @@ -1,5 +1,5 @@ -2014-01-02 0 0 1970-01-01 03:00:00 2014-01-02 03:04:06 -1 2014-01-02 07:04:06 +2014-01-02 0 0 1969-12-31 16:00:00 2014-01-02 03:04:06 +1 2014-01-01 19:04:06 0 0 0 diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql index f3130f24521..b05b49ba33a 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql @@ -8,8 +8,8 @@ DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_view; DROP TABLE IF EXISTS test_view_filtered; -CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Asia/Istanbul'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); -CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Asia/Istanbul')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('America/Los_Angeles'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('America/Los_Angeles')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) ENGINE = Memory POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); diff --git a/tests/queries/0_stateless/00206_empty_array_to_single.reference b/tests/queries/0_stateless/00206_empty_array_to_single.reference index 1f616bf2f86..248affbb606 100644 --- a/tests/queries/0_stateless/00206_empty_array_to_single.reference +++ b/tests/queries/0_stateless/00206_empty_array_to_single.reference @@ -1,7 +1,7 @@ [1,2] [0] [4,5,6] -[''] ['1970-01-01'] ['1970-01-01 03:00:00'] +[''] ['1970-01-01'] ['1970-01-01 02:00:00'] [0] [''] ['1970-01-01 00:00:00'] ['1970-01-01'] [0] ['0'] ['2015-01-01 00:00:00'] ['2015-01-01'] [0,1] [''] ['2015-01-01 00:00:00','2015-01-01 00:00:01'] ['2015-01-01','2015-01-02'] diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index a9351f91f70..9863da4b640 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -7,7 +7,7 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 1 2016-01-02 01:02:03 2 2017-08-15 13:15:01 3 -1970-01-02 06:46:39 4 +1970-01-02 05:46:39 4 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference index efd5a5534d4..31b067a2bc9 100644 --- a/tests/queries/0_stateless/00502_sum_map.reference +++ b/tests/queries/0_stateless/00502_sum_map.reference @@ -16,7 +16,7 @@ ([1],[1]) ([1],[1]) (['a'],[1]) -(['1970-01-01 03:00:01'],[1]) +(['1970-01-01 02:00:01'],[1]) (['1970-01-02'],[1]) (['01234567-89ab-cdef-0123-456789abcdef'],[1]) ([1.01],[1]) diff --git a/tests/queries/0_stateless/00506_shard_global_in_union.reference b/tests/queries/0_stateless/00506_shard_global_in_union.reference index 0ba9c36de26..c8fbaaf12a5 100644 --- a/tests/queries/0_stateless/00506_shard_global_in_union.reference +++ b/tests/queries/0_stateless/00506_shard_global_in_union.reference @@ -9,14 +9,14 @@ NOW BAD ==========================: 34 finish ===========================; * A UNION * B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 Event, Datetime A UNION * B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 * A UNION Event, Datetime B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 Event, Datetime A UNION Event, Datetime B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 diff --git a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql index 1fdd9b20b2b..4244ce2039b 100644 --- a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql +++ b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql @@ -1,3 +1,3 @@ -- concat with empty string to defeat injectiveness of toString assumption. -SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Asia/Istanbul') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; +SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Europe/Moscow') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; SELECT concat('', toString(toDateTime('2018-09-19 00:00:00', 'Asia/Tehran') + INTERVAL number * 300 SECOND)) AS k FROM numbers(1000) GROUP BY k HAVING count() > 1 ORDER BY k; diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.reference b/tests/queries/0_stateless/00927_asof_join_other_types.reference index 83ee534ff91..80c85ec1ae3 100644 --- a/tests/queries/0_stateless/00927_asof_join_other_types.reference +++ b/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -10,9 +10,9 @@ 2 1 1 0 2 3 3 3 2 5 5 3 -2 1970-01-01 03:00:01 1 0 -2 1970-01-01 03:00:03 3 3 -2 1970-01-01 03:00:05 5 3 +2 1970-01-01 02:00:01 1 0 +2 1970-01-01 02:00:03 3 3 +2 1970-01-01 02:00:05 5 3 2 1 1 0 2 3 3 3 2 5 5 3 @@ -22,6 +22,6 @@ 2 1 1 0 2 3 3 3 2 5 5 3 -2 1970-01-01 03:00:00.001 1 0 -2 1970-01-01 03:00:00.003 3 3 -2 1970-01-01 03:00:00.005 5 3 +2 1970-01-01 02:00:00.001 1 0 +2 1970-01-01 02:00:00.003 3 3 +2 1970-01-01 02:00:00.005 5 3 diff --git a/tests/queries/0_stateless/01087_storage_generate.reference b/tests/queries/0_stateless/01087_storage_generate.reference index 78c6784f7d2..00ea8ac914f 100644 --- a/tests/queries/0_stateless/01087_storage_generate.reference +++ b/tests/queries/0_stateless/01087_storage_generate.reference @@ -1,103 +1,103 @@ 100 - -[] -54259.6828 ('2088-03-01 16:26:24.094','d3c2a216-a98c-d56c-7bf7-62de9f264cf4') -[88] 34528.4014 ('2031-12-09 00:40:39.898','9ef777c8-de0e-d25e-e16c-5b624f88523c') -[-1] 121968.7945 ('2060-02-05 09:18:12.011','7655e515-d2ca-2f06-0950-e4f44f69aca7') -[-103,75] -135033.4349 ('2038-12-19 20:38:58.695','86b57d15-292d-2517-9acf-47cd053e7a3a') -[110] -202668.69 ('2009-06-18 01:53:29.808','bc630f78-7d58-0c46-dd4b-27fc35625e96') -[-22,2] 168636.9728 ('2074-09-03 09:20:20.936','7624ce27-9bff-4e9d-3f18-6851a97dd0ca') -[-22,-62] -75192.4989 ('2085-10-11 21:51:12.855','a4c4d0ed-f448-244e-1723-ca1bba816f2b') -[-2,-90] 133592.5064 ('2010-10-28 21:18:04.633','8ba9103b-f90c-b49b-38c1-223ae5f42bf7') -[-94,80] 197330.6359 ('2024-03-30 22:08:45.772','83442013-3677-5097-065d-72dfbe8a3506') -[23] 167557.6237 ('2078-07-25 21:54:42.480','be14d98e-5b24-54ee-c959-d24fa9a58fdd') -[46,-10,-63] 185107.1979 ('2040-10-07 06:06:53.504','5ed1fe6a-9313-41d7-4bf9-3948e961509f') -[-107,68] -163781.3045 ('2021-12-21 19:18:58.933','7b634f19-0863-829e-484b-be288aab54a1') -[-35,-116,73] -203577.5379 ('2093-08-01 20:21:09.407','d371bad4-b098-ffdd-f84c-6a02390c2939') -[61] 152284.9386 ('2089-12-20 19:21:33.149','9e8426c1-278a-4d9c-4076-364a95b065e3') -[75] 170968.4171 ('2020-07-17 15:45:31.975','47397a81-bda7-8bd9-59f7-d60e2204fe99') -[-115,93] -173740.5652 ('2098-04-25 22:10:33.327','117e31dd-102e-ee6c-0dbd-0a4203c18ca5') -[-20,4,21] 63834.8685 ('2000-07-08 18:09:40.271','10b0fa48-55a3-755a-4a44-36315ae04c1c') -[-110,117,91] -160640.1506 ('1998-04-18 10:58:04.479','6dfa3a8e-6e65-543c-5f50-1ff45835aa5a') -[62] 63817.7977 ('2043-01-24 02:07:18.972','98b8ef31-4f65-2f8b-1ea7-b1473900099e') -[-2] -175477.0173 ('2007-01-16 07:46:14.781','ec92f616-6e1f-003a-54c6-c5f9118d2f1b') -[] 197663.3035 ('2046-06-30 17:04:56.788','fb3244a4-8af2-104f-2a6f-25a7b7b9a112') -[-24] -174299.4691 ('2058-02-23 14:50:58.839','d63ee868-fa93-bf8b-0264-8ebbceb13e3b') -[95,38] -65083.7371 ('2015-03-10 13:33:16.429','47bd199c-f99e-51ea-84e9-b65cce9d167c') -[91,110,72] 130908.9643 ('2036-03-16 15:17:53.679','0dd4ca31-1e09-d7e0-f3df-60cad3cfa805') -[] 208972.3779 ('2034-03-05 22:29:21.994','1069d77c-dfd2-912e-60b8-3c5b964f7e11') -[-32] 167938.505 ('2093-09-10 20:39:39.050','9d1025b6-2d0c-1d84-dafd-02668eb29270') -[] 153744.6987 ('2088-10-02 11:02:11.024','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6') -[67] -74220.665 ('2074-12-30 18:43:40.817','68096065-18c8-8aca-fd21-15330ead669d') -[6] 66759.8938 ('2091-09-01 19:07:18.219','bb14f4cc-0b54-9a8c-e835-71333b28c03b') -[-28,-82,9] 168625.3131 ('2002-03-20 21:02:30.321','405bb877-6e28-8b91-cb62-bd82a3fa797c') -[] -19760.167 ('2044-11-08 07:52:03.325','13769348-9e58-0e75-3972-8bbadc150715') -[] 160663.7797 ('2025-04-12 13:17:53.501','e6370321-94f5-97e6-0348-a84e72ff5b42') -[-17,18] 99105.9856 ('1972-05-01 12:23:11.688','02618b9e-97cd-4698-d2e8-3f52f4c5a09a') -[86,77] -116990.3914 ('1981-12-31 05:06:54.198','3ac42bb4-8652-b1a8-10bb-98f0337261f8') -[-109,69,-63] -151527.3587 ('2001-01-17 11:19:56.504','77fe7ee2-f279-2855-bfd2-a7d7cee678cc') -[] -57762.3928 ('1978-08-16 18:47:37.660','ab9a110a-fd8d-3c4c-5a49-34c2005536ce') -[-77] 107274.6407 ('2017-01-12 12:03:02.657','c1ad4f17-cc54-45f3-9410-9c1011653f6d') -[] 107133.641 ('2050-10-05 06:29:27.154','36e576aa-c77f-994e-1925-4a4c40da3a0f') -[] 46672.2176 ('2094-01-21 20:25:39.144','e9ba850d-604e-bc7d-417c-1078e89d4615') -[-87,-122,-65] -86258.4663 ('2081-06-17 03:37:45.498','64795221-9719-7937-b4d2-be5f30065ece') -[-53] -48672.1424 ('1992-06-27 17:27:23.602','7c67bc31-c7bb-6197-fdca-f73329b976f2') -[34] -108954.782 ('2096-07-03 23:06:30.632','9c1b37d7-4ced-9428-a0ae-34c5436b14c4') -[] -168124.2364 ('1987-06-03 06:47:12.945','d1c39af4-f920-5095-b8e2-0f878950167b') -[] -112431.4799 ('2021-07-26 07:04:58.527','da07a72d-7e1f-8890-4c4b-326835d11b39') -[-35,-95,58] -181254.9139 ('2086-11-12 17:17:14.473','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846') -[98,119] 11468.5238 ('2092-02-25 11:07:07.695','a1fb97bf-1885-6715-c233-b88a6cd111e4') -[] 82333.8963 ('1989-11-23 01:38:57.012','a2b82b5b-8331-555c-579b-de4b0eeb7e81') -[-5,-66,69] 32055.8376 ('2040-12-17 16:49:08.704','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4') -[81,-84,-24] -210815.2512 ('2047-06-09 13:30:06.922','ac3c5b5f-f977-2830-c398-d10a6076a498') -[84,-105] -175413.7733 ('1998-11-03 04:30:21.191','c535feac-1943-c0a1-23f0-645d5406db24') -[58,31] -335.8512 ('1973-07-09 12:21:10.444','24a7dd3d-2565-1de3-05d9-e45fd8ba7729') -[-49,-47] 177399.2836 ('2049-03-15 15:33:00.190','e4432b9b-61e9-d451-dc87-ae3b9da6fd35') -[] 211525.2349 ('2106-01-11 10:44:18.918','23315435-7132-05b5-5a9b-c2c738433a87') -[45,-95,-39] -15314.9732 ('2055-10-29 13:51:12.182','833b2efa-8c72-f5f6-3040-cb4831e8ceb9') -[] 213384.5774 ('2067-02-10 22:02:42.113','0cd7f438-caa7-0d21-867c-1fdb6d67d797') -[99] -147316.5599 ('2000-05-09 21:37:34.776','a3ea6796-38d5-72ff-910d-8b4300831916') -[] 8828.2471 ('1993-11-30 16:53:22.503','7209213f-38bb-cfed-1955-f1fad5a9577a') -[117,9,-35] -134812.6269 ('2065-09-04 23:47:26.589','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d') -[-35,-58,-101] -9101.5369 ('2023-08-24 20:56:11.695','87fbe3f9-b1f0-c030-a4c0-8662045923b4') -[-58,87] 122510.9099 ('2019-08-09 17:40:29.849','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a') -[4,19,58] -13496.8672 ('2027-05-01 09:11:48.659','8996ae31-d670-cbfe-b735-b16b7c3b3476') -[23,-75,-89] -51218.286 ('2010-06-02 02:49:03.396','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee') -[50] -45297.4315 ('2087-04-15 06:46:08.247','04fe9603-97fc-07a4-6248-0f21e408c884') -[-23,17,63] 89185.9462 ('2065-10-26 08:27:12.817','a5fbf764-70b4-8b65-4a8f-7550abca3859') -[-6] -129925.369 ('2013-11-05 07:44:45.233','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab') -[-72,-108] 203171.5475 ('2000-01-28 09:34:58.032','14d5399e-7949-20c7-0e47-85e2fce5836c') -[-73,34,-27] 2676.7265 ('2057-10-25 14:37:10.049','00049a92-4350-badb-3764-dd7f019b9b31') -[65,-7] -153472.9461 ('1973-04-12 02:34:41.245','e0a0324d-1552-d11e-f3a5-fbd822d206c5') -[] 81837.7838 ('2041-09-20 20:56:39.712','f7923f2c-e526-1706-79b9-58045d9deaa7') -[-113,8] 173192.6905 ('2066-04-02 09:59:59.356','e3013e5c-92e3-c03c-b57a-e1939e00a1a7') -[107] 9694.1102 ('1984-11-02 13:11:34.034','e973db18-07b7-2117-f3ba-e7002adfa939') -[] -76460.9664 ('2051-02-10 09:54:42.143','b8344c22-9e8a-7052-c644-9c3e5989cdf1') -[59,59,0] 27041.7606 ('2083-02-17 18:21:22.547','4d6b137b-a3e1-f36d-2c0c-c8d718dda388') -[-114] 133673.963 ('2005-10-02 20:34:27.452','04785b75-30e5-af8b-547e-d15bcb7f49fb') -[43] -169861.2 ('2006-12-13 09:26:13.923','cb865d38-d961-d7f9-acbb-583b9f31252f') -[] 197115.2174 ('2060-04-08 04:17:00.488','0f26c4b4-b24c-1fd5-c619-31bcf71a4831') -[-25] -200081.9506 ('2055-12-25 02:30:16.276','0b32ad69-2c84-4269-9718-e3171482878a') -[14,110] -40196.4463 ('2084-08-13 19:37:07.588','ed882071-acba-b3ab-5d77-d79a9544a834') -[-62,-71,-82] -154958.9747 ('2100-07-08 02:32:53.741','7711c7c1-0d22-e302-fc86-61ef5e68db96') -[96,-114,-101] 78910.332 ('2100-07-19 15:02:27.109','756bfd26-c4b3-94b8-e991-c7ab7a833b76') -[49] 80117.2267 ('1970-07-04 03:50:56.748','aebac019-9054-4a77-2ccd-8801fc4a7496') -[] 102078.4801 ('2055-01-07 01:22:33.624','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5') -[-106] -108728.4237 ('2020-05-27 11:56:18.121','6b7b6674-9342-2360-4cc0-f7ef8a2404de') -[] 173213.5631 ('2034-01-18 19:04:16.059','2dc0038d-67c1-f0ee-280b-f3f0f536b01a') -[42] 139872.2503 ('2001-07-16 11:09:28.754','d6487da6-1077-1053-f314-9a1079f5df15') -[] 1107.5244 ('2031-02-26 15:06:00.846','b32bee8f-85b7-3c71-bb24-9a0093e6a08c') -[] 85892.8913 ('2088-04-13 14:54:18.514','84f3b59b-8d23-78a6-3032-91392344584f') -[43] -109644.2714 ('1974-07-04 14:45:43.139','cf722ca8-15f5-6fe2-997c-0cf88e95e902') -[] 212557.3762 ('2069-03-03 07:21:08.439','9e676cac-36e6-2962-f7b1-578214f0dfbd') -[-128,55] 80471.0777 ('1970-04-01 18:54:40.257','ca358854-416b-9c95-0b9b-c7fed7bb7cb5') -[-30,-54] -132205.4512 ('2017-12-15 22:54:15.750','3558faa4-2d2f-c533-437f-1e03d3600f1d') -[-116,-72] -91499.667 ('2105-09-23 21:06:17.755','07bb6e47-3234-c268-40d7-332388dc06f8') -[] -201636.5228 ('2085-01-27 07:54:42.717','86c3bdc3-ff0f-1723-07c2-845aa3c02370') -[-103,-39] 44330.7722 ('2064-07-02 11:08:28.068','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810') -[99] -31035.5391 ('2093-07-26 01:50:23.026','aeb59338-254f-dc09-fbd7-263da415e211') -[101] 157961.4729 ('2036-05-04 02:35:07.845','8b6221a9-8dad-4655-7460-6b3031b06893') -[111] 84732.4403 ('1997-04-06 16:10:18.624','08806a79-59f4-c833-eedc-a200bb851767') -[9,-48] -190491.559 ('2031-11-03 19:47:03.757','914e6166-c96e-e0e4-101a-0bb516cf5a2f') -[-41] -132501.8311 ('2089-11-21 21:38:28.848','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0') -[77] 64903.6579 ('1985-04-17 17:08:03.998','26484b8a-f3f1-587f-7777-bc7a57a689c3') +[] -54259.6828 ('2088-03-01 13:26:24.094','d3c2a216-a98c-d56c-7bf7-62de9f264cf4') +[88] 34528.4014 ('2031-12-08 21:40:39.898','9ef777c8-de0e-d25e-e16c-5b624f88523c') +[-1] 121968.7945 ('2060-02-05 06:18:12.011','7655e515-d2ca-2f06-0950-e4f44f69aca7') +[-103,75] -135033.4349 ('2038-12-19 17:38:58.695','86b57d15-292d-2517-9acf-47cd053e7a3a') +[110] -202668.69 ('2009-06-17 21:53:29.808','bc630f78-7d58-0c46-dd4b-27fc35625e96') +[-22,2] 168636.9728 ('2074-09-03 06:20:20.936','7624ce27-9bff-4e9d-3f18-6851a97dd0ca') +[-22,-62] -75192.4989 ('2085-10-11 18:51:12.855','a4c4d0ed-f448-244e-1723-ca1bba816f2b') +[-2,-90] 133592.5064 ('2010-10-28 17:18:04.633','8ba9103b-f90c-b49b-38c1-223ae5f42bf7') +[-94,80] 197330.6359 ('2024-03-30 19:08:45.772','83442013-3677-5097-065d-72dfbe8a3506') +[23] 167557.6237 ('2078-07-25 18:54:42.480','be14d98e-5b24-54ee-c959-d24fa9a58fdd') +[46,-10,-63] 185107.1979 ('2040-10-07 03:06:53.504','5ed1fe6a-9313-41d7-4bf9-3948e961509f') +[-107,68] -163781.3045 ('2021-12-21 16:18:58.933','7b634f19-0863-829e-484b-be288aab54a1') +[-35,-116,73] -203577.5379 ('2093-08-01 17:21:09.407','d371bad4-b098-ffdd-f84c-6a02390c2939') +[61] 152284.9386 ('2089-12-20 16:21:33.149','9e8426c1-278a-4d9c-4076-364a95b065e3') +[75] 170968.4171 ('2020-07-17 12:45:31.975','47397a81-bda7-8bd9-59f7-d60e2204fe99') +[-115,93] -173740.5652 ('2098-04-25 19:10:33.327','117e31dd-102e-ee6c-0dbd-0a4203c18ca5') +[-20,4,21] 63834.8685 ('2000-07-08 14:09:40.271','10b0fa48-55a3-755a-4a44-36315ae04c1c') +[-110,117,91] -160640.1506 ('1998-04-18 06:58:04.479','6dfa3a8e-6e65-543c-5f50-1ff45835aa5a') +[62] 63817.7977 ('2043-01-23 23:07:18.972','98b8ef31-4f65-2f8b-1ea7-b1473900099e') +[-2] -175477.0173 ('2007-01-16 04:46:14.781','ec92f616-6e1f-003a-54c6-c5f9118d2f1b') +[] 197663.3035 ('2046-06-30 14:04:56.788','fb3244a4-8af2-104f-2a6f-25a7b7b9a112') +[-24] -174299.4691 ('2058-02-23 11:50:58.839','d63ee868-fa93-bf8b-0264-8ebbceb13e3b') +[95,38] -65083.7371 ('2015-03-10 10:33:16.429','47bd199c-f99e-51ea-84e9-b65cce9d167c') +[91,110,72] 130908.9643 ('2036-03-16 12:17:53.679','0dd4ca31-1e09-d7e0-f3df-60cad3cfa805') +[] 208972.3779 ('2034-03-05 19:29:21.994','1069d77c-dfd2-912e-60b8-3c5b964f7e11') +[-32] 167938.505 ('2093-09-10 17:39:39.050','9d1025b6-2d0c-1d84-dafd-02668eb29270') +[] 153744.6987 ('2088-10-02 08:02:11.024','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6') +[67] -74220.665 ('2074-12-30 15:43:40.817','68096065-18c8-8aca-fd21-15330ead669d') +[6] 66759.8938 ('2091-09-01 16:07:18.219','bb14f4cc-0b54-9a8c-e835-71333b28c03b') +[-28,-82,9] 168625.3131 ('2002-03-20 18:02:30.321','405bb877-6e28-8b91-cb62-bd82a3fa797c') +[] -19760.167 ('2044-11-08 04:52:03.325','13769348-9e58-0e75-3972-8bbadc150715') +[] 160663.7797 ('2025-04-12 10:17:53.501','e6370321-94f5-97e6-0348-a84e72ff5b42') +[-17,18] 99105.9856 ('1972-05-01 09:23:11.688','02618b9e-97cd-4698-d2e8-3f52f4c5a09a') +[86,77] -116990.3914 ('1981-12-31 02:06:54.198','3ac42bb4-8652-b1a8-10bb-98f0337261f8') +[-109,69,-63] -151527.3587 ('2001-01-17 08:19:56.504','77fe7ee2-f279-2855-bfd2-a7d7cee678cc') +[] -57762.3928 ('1978-08-16 15:47:37.660','ab9a110a-fd8d-3c4c-5a49-34c2005536ce') +[-77] 107274.6407 ('2017-01-12 09:03:02.657','c1ad4f17-cc54-45f3-9410-9c1011653f6d') +[] 107133.641 ('2050-10-05 03:29:27.154','36e576aa-c77f-994e-1925-4a4c40da3a0f') +[] 46672.2176 ('2094-01-21 17:25:39.144','e9ba850d-604e-bc7d-417c-1078e89d4615') +[-87,-122,-65] -86258.4663 ('2081-06-17 00:37:45.498','64795221-9719-7937-b4d2-be5f30065ece') +[-53] -48672.1424 ('1992-06-27 13:27:23.602','7c67bc31-c7bb-6197-fdca-f73329b976f2') +[34] -108954.782 ('2096-07-03 20:06:30.632','9c1b37d7-4ced-9428-a0ae-34c5436b14c4') +[] -168124.2364 ('1987-06-03 02:47:12.945','d1c39af4-f920-5095-b8e2-0f878950167b') +[] -112431.4799 ('2021-07-26 04:04:58.527','da07a72d-7e1f-8890-4c4b-326835d11b39') +[-35,-95,58] -181254.9139 ('2086-11-12 14:17:14.473','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846') +[98,119] 11468.5238 ('2092-02-25 08:07:07.695','a1fb97bf-1885-6715-c233-b88a6cd111e4') +[] 82333.8963 ('1989-11-22 22:38:57.012','a2b82b5b-8331-555c-579b-de4b0eeb7e81') +[-5,-66,69] 32055.8376 ('2040-12-17 13:49:08.704','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4') +[81,-84,-24] -210815.2512 ('2047-06-09 10:30:06.922','ac3c5b5f-f977-2830-c398-d10a6076a498') +[84,-105] -175413.7733 ('1998-11-03 01:30:21.191','c535feac-1943-c0a1-23f0-645d5406db24') +[58,31] -335.8512 ('1973-07-09 09:21:10.444','24a7dd3d-2565-1de3-05d9-e45fd8ba7729') +[-49,-47] 177399.2836 ('2049-03-15 12:33:00.190','e4432b9b-61e9-d451-dc87-ae3b9da6fd35') +[] 211525.2349 ('2106-01-11 07:44:18.918','23315435-7132-05b5-5a9b-c2c738433a87') +[45,-95,-39] -15314.9732 ('2055-10-29 10:51:12.182','833b2efa-8c72-f5f6-3040-cb4831e8ceb9') +[] 213384.5774 ('2067-02-10 19:02:42.113','0cd7f438-caa7-0d21-867c-1fdb6d67d797') +[99] -147316.5599 ('2000-05-09 17:37:34.776','a3ea6796-38d5-72ff-910d-8b4300831916') +[] 8828.2471 ('1993-11-30 13:53:22.503','7209213f-38bb-cfed-1955-f1fad5a9577a') +[117,9,-35] -134812.6269 ('2065-09-04 20:47:26.589','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d') +[-35,-58,-101] -9101.5369 ('2023-08-24 17:56:11.695','87fbe3f9-b1f0-c030-a4c0-8662045923b4') +[-58,87] 122510.9099 ('2019-08-09 14:40:29.849','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a') +[4,19,58] -13496.8672 ('2027-05-01 06:11:48.659','8996ae31-d670-cbfe-b735-b16b7c3b3476') +[23,-75,-89] -51218.286 ('2010-06-01 22:49:03.396','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee') +[50] -45297.4315 ('2087-04-15 03:46:08.247','04fe9603-97fc-07a4-6248-0f21e408c884') +[-23,17,63] 89185.9462 ('2065-10-26 05:27:12.817','a5fbf764-70b4-8b65-4a8f-7550abca3859') +[-6] -129925.369 ('2013-11-05 03:44:45.233','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab') +[-72,-108] 203171.5475 ('2000-01-28 06:34:58.032','14d5399e-7949-20c7-0e47-85e2fce5836c') +[-73,34,-27] 2676.7265 ('2057-10-25 11:37:10.049','00049a92-4350-badb-3764-dd7f019b9b31') +[65,-7] -153472.9461 ('1973-04-11 23:34:41.245','e0a0324d-1552-d11e-f3a5-fbd822d206c5') +[] 81837.7838 ('2041-09-20 17:56:39.712','f7923f2c-e526-1706-79b9-58045d9deaa7') +[-113,8] 173192.6905 ('2066-04-02 06:59:59.356','e3013e5c-92e3-c03c-b57a-e1939e00a1a7') +[107] 9694.1102 ('1984-11-02 10:11:34.034','e973db18-07b7-2117-f3ba-e7002adfa939') +[] -76460.9664 ('2051-02-10 06:54:42.143','b8344c22-9e8a-7052-c644-9c3e5989cdf1') +[59,59,0] 27041.7606 ('2083-02-17 15:21:22.547','4d6b137b-a3e1-f36d-2c0c-c8d718dda388') +[-114] 133673.963 ('2005-10-02 16:34:27.452','04785b75-30e5-af8b-547e-d15bcb7f49fb') +[43] -169861.2 ('2006-12-13 06:26:13.923','cb865d38-d961-d7f9-acbb-583b9f31252f') +[] 197115.2174 ('2060-04-08 01:17:00.488','0f26c4b4-b24c-1fd5-c619-31bcf71a4831') +[-25] -200081.9506 ('2055-12-24 23:30:16.276','0b32ad69-2c84-4269-9718-e3171482878a') +[14,110] -40196.4463 ('2084-08-13 16:37:07.588','ed882071-acba-b3ab-5d77-d79a9544a834') +[-62,-71,-82] -154958.9747 ('2100-07-07 23:32:53.741','7711c7c1-0d22-e302-fc86-61ef5e68db96') +[96,-114,-101] 78910.332 ('2100-07-19 12:02:27.109','756bfd26-c4b3-94b8-e991-c7ab7a833b76') +[49] 80117.2267 ('1970-07-04 00:50:56.748','aebac019-9054-4a77-2ccd-8801fc4a7496') +[] 102078.4801 ('2055-01-06 22:22:33.624','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5') +[-106] -108728.4237 ('2020-05-27 08:56:18.121','6b7b6674-9342-2360-4cc0-f7ef8a2404de') +[] 173213.5631 ('2034-01-18 16:04:16.059','2dc0038d-67c1-f0ee-280b-f3f0f536b01a') +[42] 139872.2503 ('2001-07-16 07:09:28.754','d6487da6-1077-1053-f314-9a1079f5df15') +[] 1107.5244 ('2031-02-26 12:06:00.846','b32bee8f-85b7-3c71-bb24-9a0093e6a08c') +[] 85892.8913 ('2088-04-13 11:54:18.514','84f3b59b-8d23-78a6-3032-91392344584f') +[43] -109644.2714 ('1974-07-04 11:45:43.139','cf722ca8-15f5-6fe2-997c-0cf88e95e902') +[] 212557.3762 ('2069-03-03 04:21:08.439','9e676cac-36e6-2962-f7b1-578214f0dfbd') +[-128,55] 80471.0777 ('1970-04-01 15:54:40.257','ca358854-416b-9c95-0b9b-c7fed7bb7cb5') +[-30,-54] -132205.4512 ('2017-12-15 19:54:15.750','3558faa4-2d2f-c533-437f-1e03d3600f1d') +[-116,-72] -91499.667 ('2105-09-23 18:06:17.755','07bb6e47-3234-c268-40d7-332388dc06f8') +[] -201636.5228 ('2085-01-27 04:54:42.717','86c3bdc3-ff0f-1723-07c2-845aa3c02370') +[-103,-39] 44330.7722 ('2064-07-02 08:08:28.068','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810') +[99] -31035.5391 ('2093-07-25 22:50:23.026','aeb59338-254f-dc09-fbd7-263da415e211') +[101] 157961.4729 ('2036-05-03 23:35:07.845','8b6221a9-8dad-4655-7460-6b3031b06893') +[111] 84732.4403 ('1997-04-06 12:10:18.624','08806a79-59f4-c833-eedc-a200bb851767') +[9,-48] -190491.559 ('2031-11-03 16:47:03.757','914e6166-c96e-e0e4-101a-0bb516cf5a2f') +[-41] -132501.8311 ('2089-11-21 18:38:28.848','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0') +[77] 64903.6579 ('1985-04-17 13:08:03.998','26484b8a-f3f1-587f-7777-bc7a57a689c3') - diff --git a/tests/queries/0_stateless/01087_storage_generate.sql b/tests/queries/0_stateless/01087_storage_generate.sql index 7df9f3931d0..a9320791816 100644 --- a/tests/queries/0_stateless/01087_storage_generate.sql +++ b/tests/queries/0_stateless/01087_storage_generate.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS test_table; -CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=GenerateRandom(); +CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE = GenerateRandom(); SELECT COUNT(*) FROM (SELECT * FROM test_table LIMIT 100); DROP TABLE IF EXISTS test_table; @@ -7,11 +7,10 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Asia/Istanbul'), UUID)) ENGINE=GenerateRandom(10, 5, 3); +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'UTC'), UUID)) ENGINE = GenerateRandom(10, 5, 3); SELECT * FROM test_table_2 LIMIT 100; SELECT '-'; DROP TABLE IF EXISTS test_table_2; - diff --git a/tests/queries/0_stateless/01087_table_function_generate.reference b/tests/queries/0_stateless/01087_table_function_generate.reference index ef7eac41ca2..d62ff5618fc 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/tests/queries/0_stateless/01087_table_function_generate.reference @@ -46,29 +46,29 @@ h \N o - -Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') -2113-06-12 2050-12-17 02:46:35 2096-02-16 22:18:22 -2141-08-09 2013-10-17 23:35:26 1976-01-24 12:52:48 -2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50 -1997-04-11 1972-09-18 23:44:08 2040-07-10 14:46:42 -2103-11-03 2044-11-23 20:57:12 1970-10-09 02:30:14 -2066-11-19 2029-12-10 03:13:55 2106-01-30 21:52:44 -2064-08-14 2016-07-14 11:33:45 2096-12-12 00:40:50 -2046-09-13 2085-07-10 18:51:14 2096-01-15 16:31:33 -2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39 -2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22 +Date DateTime(\'UTC\') DateTime(\'UTC\') +2113-06-12 2050-12-16 23:46:35 2096-02-16 19:18:22 +2141-08-09 2013-10-17 19:35:26 1976-01-24 09:52:48 +2039-08-16 1974-11-17 20:22:46 1980-03-04 18:02:50 +1997-04-11 1972-09-18 20:44:08 2040-07-10 11:46:42 +2103-11-03 2044-11-23 17:57:12 1970-10-08 23:30:14 +2066-11-19 2029-12-10 00:13:55 2106-01-30 18:52:44 +2064-08-14 2016-07-14 08:33:45 2096-12-11 21:40:50 +2046-09-13 2085-07-10 15:51:14 2096-01-15 13:31:33 +2008-03-16 2047-05-16 20:28:36 2103-02-11 13:44:39 +2000-07-07 2105-07-19 16:29:06 1980-01-02 02:18:22 - -DateTime64(3, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') -1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215 -1978-08-25 17:07:25.427 2034-05-02 20:49:42.148578 2015-08-26 15:26:31.783160 -2037-04-04 10:50:56.898 2055-05-28 11:12:48.819271 2068-12-26 09:58:49.635722 -2041-09-02 07:07:24.891 2051-08-01 14:15:40.218654 2081-10-19 15:55:40.057084 -1976-07-15 23:59:41.974 2075-01-29 20:34:10.425321 1996-12-31 10:51:28.562331 -1974-11-03 08:09:51.992 2010-04-19 04:09:03.451487 1994-05-15 15:42:53.162162 -2061-10-11 20:14:02.729 1981-07-22 10:13:45.729103 2084-05-27 08:59:37.746021 -1989-12-13 02:01:16.532 1992-10-05 07:07:57.973222 2037-10-24 18:53:50.985504 -1992-12-28 12:26:04.030 1971-07-29 09:20:38.230976 1980-03-26 18:49:55.428516 -2051-12-11 10:09:13.162 1982-01-12 03:25:45.754492 2010-05-17 11:01:28.452864 +DateTime64(3, \'UTC\') DateTime64(6, \'UTC\') DateTime64(6, \'UTC\') +1978-06-07 20:50:57.320 2013-08-28 06:21:54.010758 1991-08-25 13:23:26.140215 +1978-08-25 14:07:25.427 2034-05-02 17:49:42.148578 2015-08-26 12:26:31.783160 +2037-04-04 07:50:56.898 2055-05-28 08:12:48.819271 2068-12-26 06:58:49.635722 +2041-09-02 04:07:24.891 2051-08-01 11:15:40.218654 2081-10-19 12:55:40.057084 +1976-07-15 20:59:41.974 2075-01-29 17:34:10.425321 1996-12-31 07:51:28.562331 +1974-11-03 05:09:51.992 2010-04-19 00:09:03.451487 1994-05-15 11:42:53.162162 +2061-10-11 17:14:02.729 1981-07-22 06:13:45.729103 2084-05-27 05:59:37.746021 +1989-12-12 23:01:16.532 1992-10-05 04:07:57.973222 2037-10-24 15:53:50.985504 +1992-12-28 09:26:04.030 1971-07-29 06:20:38.230976 1980-03-26 15:49:55.428516 +2051-12-11 07:09:13.162 1982-01-12 00:25:45.754492 2010-05-17 07:01:28.452864 Date32 1934-01-06 2039-08-16 @@ -225,25 +225,25 @@ U6 \'%Y~t9 RL,{Xs\\tw - -[] -27467.1221 ('2021-03-08 03:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') -[] 204013.7193 ('2026-05-05 05:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') -[-122] -9432.2617 ('2001-08-23 08:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4') -[-30,61] -133488.2399 ('2048-05-14 09:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') -[-1] 58720.0591 ('1976-06-07 23:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') -[1] -18736.7874 ('1977-03-10 04:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') -[34,-10] -99367.9009 ('2031-05-08 10:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') -[110] 31562.7502 ('2045-02-27 11:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f') -[114] -84125.1554 ('2023-06-06 06:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') -[124] -114719.5228 ('2010-11-11 22:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') +[] -27467.1221 ('2021-03-08 00:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') +[] 204013.7193 ('2026-05-05 02:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') +[-122] -9432.2617 ('2001-08-23 04:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4') +[-30,61] -133488.2399 ('2048-05-14 06:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') +[-1] 58720.0591 ('1976-06-07 20:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') +[1] -18736.7874 ('1977-03-10 01:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') +[34,-10] -99367.9009 ('2031-05-08 07:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') +[110] 31562.7502 ('2045-02-27 08:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f') +[114] -84125.1554 ('2023-06-06 03:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') +[124] -114719.5228 ('2010-11-11 19:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') - -[] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1984-12-06','2028-08-17 06:05:01','2036-04-02 23:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d') F743 -[-102,-118] 392272782 Eb -14818.02 o -2.664492247169164e59 ('2082-12-26','2052-09-09 06:50:50','2088-04-21 05:07:08.245','aeb9c26e-0ee7-2b8e-802b-2a96319b8e60') CBF4 -[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('2108-04-19','2090-07-31 16:45:26','2076-07-10 09:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304') EB1D -[-28,100] 3675466147 { -146685.1749 h 3.6676044396877755e142 ('2017-10-25','2100-02-28 18:07:18','2055-10-14 06:36:20.056','14949dae-dfa8-a124-af83-887348b2f609') 6D88 -[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2141-04-06','2074-08-10 06:25:12','1976-12-04 18:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf') AD43 -[11,-36] 3308237300 \N 171205.1896 \N 5.634708707075817e195 ('1974-10-31','1993-12-24 09:38:45','2038-07-15 05:22:51.805','63d999b8-8cca-e237-c4a4-4dd7d0096f65') 609E -[39] 1614362420 `4A8P 157144.063 o -1.1843143253872814e-255 ('2147-08-18','2072-09-28 18:27:27','2073-07-10 12:19:58.146','6483f5c0-8733-364c-4fa0-9948d32e8903') A886 -[48,-120] 3848918261 1 Date: Fri, 27 May 2022 02:20:16 +0200 Subject: [PATCH 096/150] Fix more tests --- .../0_stateless/00945_bloom_filter_index.sql | 18 +++++++++--------- .../01414_low_cardinality_nullable.sql | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index c06f8e87173..fc18a4a4dc5 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -42,7 +42,7 @@ SELECT COUNT() FROM bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_r SELECT COUNT() FROM bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; @@ -68,7 +68,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -83,7 +83,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -98,7 +98,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -121,7 +121,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE u64 = 1 SETTINGS max_rows SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; @@ -219,7 +219,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -234,7 +234,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -249,7 +249,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -279,7 +279,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-04-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:01:40', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '100'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5)); diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index 871d74d7fb9..2d3d31e9b5c 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -140,7 +140,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 1); SELECT count() FROM lc_nullable WHERE has(f32, 1); SELECT count() FROM lc_nullable WHERE has(f64, 1); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-02')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '1'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('1', 5)); @@ -168,7 +168,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 5); SELECT count() FROM lc_nullable WHERE has(f32, 5); SELECT count() FROM lc_nullable WHERE has(f64, 5); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-06')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:00:05', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '5'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('5', 5)); @@ -183,7 +183,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 10); SELECT count() FROM lc_nullable WHERE has(f32, 10); SELECT count() FROM lc_nullable WHERE has(f64, 10); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:00:10', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '10'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('10', 5)); @@ -213,7 +213,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 100); SELECT count() FROM lc_nullable WHERE has(f32, 100); SELECT count() FROM lc_nullable WHERE has(f64, 100); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-04-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:01:40', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '100'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('100', 5)); From 393846e1eed2c90ff98134254b94a3721642a73b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 04:08:04 +0200 Subject: [PATCH 097/150] Fix test --- tests/queries/0_stateless/01699_timezoneOffset.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference index 860829f0ce6..a1cc6391e6f 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.reference +++ b/tests/queries/0_stateless/01699_timezoneOffset.reference @@ -1,4 +1,4 @@ -DST boundary test for Asia/Istanbul: +DST boundary test for Europe/Moscow: 0 1981-04-01 22:40:00 14400 354998400 1 1981-04-01 22:50:00 14400 354999000 2 1981-04-01 23:00:00 14400 354999600 @@ -70,7 +70,7 @@ DST boundary test for Australia/Lord_Howe: 15 2019-04-07 03:00:00 37800 1554568200 16 2019-04-07 03:10:00 37800 1554568800 17 2019-04-07 03:20:00 37800 1554569400 -4 days test in batch comparing with manually computation result for Asia/Istanbul: +4 days test in batch comparing with manually computation result for Europe/Moscow: 4 days test in batch comparing with manually computation result for Asia/Tehran: 4 days test in batch comparing with manually computation result for Australia/Lord_Howe Moscow DST Years: From 841858ec303abeec27c86282dccc010cfceaa1ff Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 27 May 2022 13:13:36 +0200 Subject: [PATCH 098/150] Revert "Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part"" --- src/Storages/MergeTree/DataPartsExchange.cpp | 41 +++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 620466b8035..f6d53979663 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -470,29 +470,28 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - PooledReadWriteBufferFromHTTP in{ + std::unique_ptr in = std::make_unique( uri, Poco::Net::HTTPRequest::HTTP_POST, - {}, + nullptr, timeouts, creds, DBMS_DEFAULT_BUFFER_SIZE, 0, /* no redirects */ - data_settings->replicated_max_parallel_fetches_for_host - }; + static_cast(data_settings->replicated_max_parallel_fetches_for_host)); - int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); + int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); ReservationPtr reservation; size_t sum_files_size = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { - readBinary(sum_files_size, in); + readBinary(sum_files_size, *in); if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { IMergeTreeDataPart::TTLInfos ttl_infos; String ttl_infos_string; - readBinary(ttl_infos_string, in); + readBinary(ttl_infos_string, *in); ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); @@ -529,13 +528,13 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( String part_type = "Wide"; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, in); + readStringBinary(part_type, *in); UUID part_uuid = UUIDHelpers::Nil; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) - readUUIDText(part_uuid, in); + readUUIDText(part_uuid, *in); - String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); + String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); if (!remote_fs_metadata.empty()) { if (!try_zero_copy) @@ -549,7 +548,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( try { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, *in, throttler); } catch (const Exception & e) { @@ -557,6 +556,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( throw; LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); + + /// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock + /// on http pool. + try + { + in.reset(); + } + catch (...) + { + tryLogCurrentException(log); + } + /// Try again but without zero-copy return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); @@ -570,16 +581,16 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( part_info.partition_id, part_name, new_part_path, replica_path, uri, to_detached, sum_files_size); - in.setNextCallback(ReplicatedFetchReadCallback(*entry)); + in->setNextCallback(ReplicatedFetchReadCallback(*entry)); size_t projections = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - readBinary(projections, in); + readBinary(projections, *in); MergeTreeData::DataPart::Checksums checksums; return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, *in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, *in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( From 461bb42fb8fd79146224a6ba8cd59e619a57ea6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 27 May 2022 13:40:07 +0200 Subject: [PATCH 099/150] Fix flaky test --- .../test_replicated_merge_tree_hdfs_zero_copy/test.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py index 23f465eaabd..7d65bed3901 100644 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -4,6 +4,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry from pyhdfs import HdfsClient @@ -264,8 +265,8 @@ def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): node1.query("OPTIMIZE TABLE ttl_move_test FINAL") node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) - assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" - assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" + assert_eq_with_retry(node1, "SELECT count() FROM ttl_move_test", "2") + assert_eq_with_retry(node2, "SELECT count() FROM ttl_move_test", "2") assert ( node1.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") == "(10),(11)" @@ -299,8 +300,9 @@ def test_hdfs_zero_copy_with_ttl_delete(cluster): node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) - assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" - assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" + assert_eq_with_retry(node1, "SELECT count() FROM ttl_delete_test", "1") + assert_eq_with_retry(node2, "SELECT count() FROM ttl_delete_test", "1") + assert ( node1.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") == "(11)" From c79600c4c8568e5bedd772e4d2a682d1082a4d59 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 27 May 2022 13:44:29 +0200 Subject: [PATCH 100/150] Fix build --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f6d53979663..d6acf909c1e 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -519,7 +519,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); - UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); + UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); if (revision) disk->syncRevision(revision); From d68c30a92e7d4714c6f2ba39284bc7f8967a050e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 27 May 2022 12:27:25 +0000 Subject: [PATCH 101/150] fix tests --- tests/queries/0_stateless/02306_part_types_profile_events.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.sql b/tests/queries/0_stateless/02306_part_types_profile_events.sql index fd6178941f2..4b13504612e 100644 --- a/tests/queries/0_stateless/02306_part_types_profile_events.sql +++ b/tests/queries/0_stateless/02306_part_types_profile_events.sql @@ -29,7 +29,7 @@ SELECT count(), sum(ProfileEvents['InsertedWideParts']), sum(ProfileEvents['Inse AND query ILIKE 'INSERT INTO%' AND type = 'QueryFinish'; SELECT count(), sum(ProfileEvents['MergedIntoWideParts']), sum(ProfileEvents['MergedIntoCompactParts']) - FROM system.query_log WHERE has(databases, currentDatabase()) + FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02306_part_types_profile_events' AND query ILIKE 'OPTIMIZE TABLE%' AND type = 'QueryFinish'; @@ -39,6 +39,6 @@ SELECT part_type FROM system.part_log WHERE database = currentDatabase() SELECT part_type, count() > 0 FROM system.part_log WHERE database = currentDatabase() AND table = 't_parts_profile_events' AND event_type = 'MergeParts' - GROUP BY part_type; + GROUP BY part_type ORDER BY part_type; DROP TABLE t_parts_profile_events; From 735d2dfebd390c42a27554f85d9ccc87768ffc94 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 27 May 2022 13:55:20 +0300 Subject: [PATCH 102/150] tests: fix 01317_no_password_in_command_line flakiness (and make it race free) Before it was possible not to check if the query was gone already, also it checks all processes not only the process of the client for the possible password. v2: make it parallel aware Signed-off-by: Azat Khuzhin --- ...1317_no_password_in_command_line.reference | 2 - .../01317_no_password_in_command_line.sh | 72 ++++++++----------- 2 files changed, 30 insertions(+), 44 deletions(-) diff --git a/tests/queries/0_stateless/01317_no_password_in_command_line.reference b/tests/queries/0_stateless/01317_no_password_in_command_line.reference index aa47d0d46d4..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01317_no_password_in_command_line.reference +++ b/tests/queries/0_stateless/01317_no_password_in_command_line.reference @@ -1,2 +0,0 @@ -0 -0 diff --git a/tests/queries/0_stateless/01317_no_password_in_command_line.sh b/tests/queries/0_stateless/01317_no_password_in_command_line.sh index c9886aca31e..5b95f077ea2 100755 --- a/tests/queries/0_stateless/01317_no_password_in_command_line.sh +++ b/tests/queries/0_stateless/01317_no_password_in_command_line.sh @@ -7,52 +7,40 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS user" -$CLICKHOUSE_CLIENT --query "CREATE USER user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" - -# False positive result due to race condition with sleeps is Ok. - -$CLICKHOUSE_CLIENT --user user --password hello --query "SELECT sleep(1)" & -bg_query=$! +user=user_$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS $user" +$CLICKHOUSE_CLIENT --query "CREATE USER $user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" +trap '$CLICKHOUSE_CLIENT --query "DROP USER $user"' EXIT # Wait for query to start executing. At that time, the password should be cleared. -for _ in {1..20} -do - if $CLICKHOUSE_CLIENT --query "SHOW PROCESSLIST" | grep -q 'SELECT sleep(1)' - then - break - fi +function wait_query_pid() +{ + local query_id=$1 && shift - if ! kill -0 -- $bg_query 2>/dev/null - then - # The SELECT sleep(1) query finished earlier that we could grep for it in the process list, but it should have run for at least one second. It is Ok. - break - fi -done + for _ in {1..20}; do + if [ "$($CLICKHOUSE_CLIENT --param_query_id "$query_id" --query "SELECT count() FROM system.processes WHERE query_id = {query_id:String}")" -eq 1 ]; then + break + fi + sleep 0.3 + done +} -ps auxw | grep -F -- '--password' | grep -F hello ||: -wait - -# Once again with different syntax -$CLICKHOUSE_CLIENT --user user --password=hello --query "SELECT sleep(1)" & +# --password +query_id=first-$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query_id "$query_id" --user "$user" --password hello --max_block_size 1 --query "SELECT sleepEachRow(1) FROM system.numbers LIMIT 100" >& /dev/null & bg_query=$! - -# Wait for query to start executing. At that time, the password should be cleared. -for _ in {1..20} -do - if $CLICKHOUSE_CLIENT --query "SHOW PROCESSLIST" | grep -q 'SELECT sleep(1)' - then - break - fi - - if ! kill -0 -- $bg_query 2>/dev/null - then - # The SELECT sleep(1) query finished earlier that we could grep for it in the process list, but it should have run for at least one second. It is Ok. - break - fi -done - -ps auxw | grep -F -- '--password' | grep -F hello ||: +wait_query_pid "$query_id" +ps u --no-header $bg_query | grep -F -- '--password' | grep -F hello ||: +grep -F -- '--password' < "/proc/$bg_query/comm" | grep -F hello ||: +$CLICKHOUSE_CLIENT --format Null --param_query_id "$query_id" -q "KILL QUERY WHERE query_id = {query_id:String} SYNC" wait -$CLICKHOUSE_CLIENT --query "DROP USER user" +# --password= +query_id=second-$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query_id "$query_id" --user "$user" --password=hello --max_block_size 1 --query "SELECT sleepEachRow(1) FROM system.numbers LIMIT 100" >& /dev/null & +bg_query=$! +wait_query_pid "$query_id" +ps u --no-header $bg_query | grep -F -- '--password' | grep -F hello ||: +grep -F -- '--password' < "/proc/$bg_query/comm" | grep -F hello ||: +$CLICKHOUSE_CLIENT --format Null --param_query_id "$query_id" -q "KILL QUERY WHERE query_id = {query_id:String} SYNC" +wait From abc90fad8dba3521a29d044cadad599d9336dcec Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 27 May 2022 12:42:51 +0000 Subject: [PATCH 103/150] fix WITH FILL with negative itervals --- src/Processors/Transforms/FillingTransform.cpp | 4 ++-- tests/queries/0_stateless/02112_with_fill_interval.reference | 3 +++ tests/queries/0_stateless/02112_with_fill_interval.sql | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 9e5d57a2b43..a41b5660e0d 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -90,9 +90,9 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & if (which.isDate() || which.isDate32()) { Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); - if (avg_seconds < 86400) + if (std::abs(avg_seconds) < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); + "Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds)); } if (which.isDate()) diff --git a/tests/queries/0_stateless/02112_with_fill_interval.reference b/tests/queries/0_stateless/02112_with_fill_interval.reference index fc6f9378bfa..4bb99803eb1 100644 --- a/tests/queries/0_stateless/02112_with_fill_interval.reference +++ b/tests/queries/0_stateless/02112_with_fill_interval.reference @@ -107,3 +107,6 @@ 2020-05-01 2 0 2020-05-01 3 0 2020-05-01 4 0 +1970-01-04 +1970-01-03 +1970-01-02 diff --git a/tests/queries/0_stateless/02112_with_fill_interval.sql b/tests/queries/0_stateless/02112_with_fill_interval.sql index f26ec7da8c9..d2416f9a84b 100644 --- a/tests/queries/0_stateless/02112_with_fill_interval.sql +++ b/tests/queries/0_stateless/02112_with_fill_interval.sql @@ -79,3 +79,6 @@ d WITH FILL id WITH FILL FROM 1 TO 5; DROP TABLE with_fill_date; + +SELECT d FROM (SELECT toDate(1) AS d) +ORDER BY d DESC WITH FILL FROM toDate(3) TO toDate(0) STEP INTERVAL -1 DAY; From f8762667b0d799e5068dfe15737eb6b0b3a9f8d5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 12:37:11 +0000 Subject: [PATCH 104/150] Use jepsen CI directly in PR workflow --- .github/workflows/jepsen.yml | 5 +---- .github/workflows/pull_request.yml | 8 ++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 9b7c4e63d48..1682cd1e812 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -7,11 +7,8 @@ concurrency: on: # yamllint disable-line rule:truthy schedule: - cron: '0 */6 * * *' - workflow_run: - workflows: ["PullRequestCI"] - types: - - completed workflow_dispatch: + workflow_call: jobs: KeeperJepsenRelease: runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 01490dff59e..01fbcd42559 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3272,6 +3272,13 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" +############################################################################################# +###################################### JEPSEN TESTS ######################################### +############################################################################################# + Jepsen: + needs: [BuilderBinRelease] + uses: ./.github/workflows/jepsen.yml + FinishCheck: needs: - StyleCheck @@ -3336,6 +3343,7 @@ jobs: - SplitBuildSmokeTest - CompatibilityCheck - IntegrationTestsFlakyCheck + - Jepsen runs-on: [self-hosted, style-checker] steps: - name: Clear repository From fc3d39629e66cf0f32f5f8281fae0bd9578016bb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 12:48:11 +0000 Subject: [PATCH 105/150] Temporarly remove dependancy on FastTest --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 01fbcd42559..149a9981203 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -298,7 +298,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" BuilderBinRelease: - needs: [DockerHubPush, FastTest] + needs: [DockerHubPush] runs-on: [self-hosted, builder] steps: - name: Set envs From 30f2c9ad58ef235542391a7b84c12b8d468a9f56 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 13:31:21 +0000 Subject: [PATCH 106/150] Polish workflow --- .github/workflows/pull_request.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 149a9981203..0c339c5ed4b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -298,7 +298,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" BuilderBinRelease: - needs: [DockerHubPush] + needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] steps: - name: Set envs @@ -3276,8 +3276,8 @@ jobs: ###################################### JEPSEN TESTS ######################################### ############################################################################################# Jepsen: - needs: [BuilderBinRelease] - uses: ./.github/workflows/jepsen.yml + needs: [BuilderBinRelease] + uses: ./.github/workflows/jepsen.yml # yamllint disable-line FinishCheck: needs: From 2ca5b7812d3719c016141135a37a4a4bfad0736a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 14:15:34 +0000 Subject: [PATCH 107/150] Ignore reusable workflow error --- .github/workflows/pull_request.yml | 2 +- utils/check-style/check-workflows | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0c339c5ed4b..6d56d5e3105 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3277,7 +3277,7 @@ jobs: ############################################################################################# Jepsen: needs: [BuilderBinRelease] - uses: ./.github/workflows/jepsen.yml # yamllint disable-line + uses: ./.github/workflows/jepsen.yml FinishCheck: needs: diff --git a/utils/check-style/check-workflows b/utils/check-style/check-workflows index 6e9cb87ed36..df2292d84ca 100755 --- a/utils/check-style/check-workflows +++ b/utils/check-style/check-workflows @@ -6,4 +6,4 @@ GIT_ROOT=$(git rev-parse --show-cdup) GIT_ROOT=${GIT_ROOT:-.} act --list --directory="$GIT_ROOT" 1>/dev/null 2>&1 || act --list --directory="$GIT_ROOT" 2>&1 -actionlint || : +actionlint -ignore 'reusable workflow call.+' || : From 540353566cb0bdbb954a195a2484d0e0f65fe5aa Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 27 May 2022 15:14:10 +0200 Subject: [PATCH 108/150] Added LpNorm and LpDistance functions for arrays --- src/Functions/array/arrayDistance.cpp | 126 +++++++++++++++--- src/Functions/array/arrayNorm.cpp | 114 ++++++++++++---- src/Functions/vectorFunctions.cpp | 36 ++++- .../02282_array_distance.reference | 4 + .../0_stateless/02282_array_distance.sql | 6 + .../0_stateless/02283_array_norm.reference | 27 ++-- .../queries/0_stateless/02283_array_norm.sql | 16 ++- 7 files changed, 262 insertions(+), 67 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 7c1cddf4435..2121189dacb 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -7,12 +7,13 @@ #include #include #include -#include "base/range.h" +#include namespace DB { namespace ErrorCodes { + extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; @@ -22,6 +23,8 @@ struct L1Distance { static inline String name = "L1"; + struct ConstParams {}; + template struct State { @@ -29,13 +32,13 @@ struct L1Distance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.sum += fabs(x - y); } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return state.sum; } @@ -45,6 +48,8 @@ struct L2Distance { static inline String name = "L2"; + struct ConstParams {}; + template struct State { @@ -52,22 +57,53 @@ struct L2Distance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.sum += (x - y) * (x - y); } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return sqrt(state.sum); } }; +struct LpDistance +{ + static inline String name = "Lp"; + + struct ConstParams + { + Float64 power; + Float64 inverted_power; + }; + + template + struct State + { + FloatType sum = 0; + }; + + template + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams & params) + { + state.sum += std::pow(fabs(x - y), params.power); + } + + template + static ResultType finalize(const State & state, const ConstParams & params) + { + return std::pow(state.sum, params.inverted_power); + } +}; + struct LinfDistance { static inline String name = "Linf"; + struct ConstParams {}; + template struct State { @@ -75,21 +111,24 @@ struct LinfDistance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.dist = fmax(state.dist, fabs(x - y)); } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return state.dist; } }; + struct CosineDistance { static inline String name = "Cosine"; + struct ConstParams {}; + template struct State { @@ -99,7 +138,7 @@ struct CosineDistance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.dot_prod += x * y; state.x_squared += x * x; @@ -107,7 +146,7 @@ struct CosineDistance } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return 1 - state.dot_prod / sqrt(state.x_squared * state.y_squared); } @@ -121,17 +160,18 @@ public: String getName() const override { return name; } static FunctionPtr create(ContextPtr) { return std::make_shared>(); } size_t getNumberOfArguments() const override { return 2; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { DataTypes types; - for (const auto & argument : arguments) + for (size_t i = 0; i < 2; ++i) { - const auto * array_type = checkAndGetDataType(argument.type.get()); + const auto * array_type = checkAndGetDataType(arguments[i].type.get()); if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array.", i, getName()); types.push_back(array_type->getNestedType()); } @@ -221,7 +261,7 @@ private: { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithTypes(arguments[0].column, arguments[1].column, input_rows_count); \ + return executeWithTypes(arguments[0].column, arguments[1].column, input_rows_count, arguments); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -237,15 +277,15 @@ private: } template - ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const + ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { if (typeid_cast(col_x.get())) { - return executeWithTypesFirstArgConst(col_x, col_y, input_rows_count); + return executeWithTypesFirstArgConst(col_x, col_y, input_rows_count, arguments); } else if (typeid_cast(col_y.get())) { - return executeWithTypesFirstArgConst(col_y, col_x, input_rows_count); + return executeWithTypesFirstArgConst(col_y, col_x, input_rows_count, arguments); } col_x = col_x->convertToFullColumnIfConst(); @@ -273,6 +313,8 @@ private: } } + const typename Kernel::ConstParams kernel_params = initConstParams(arguments); + auto result = ColumnVector::create(input_rows_count); auto & result_data = result->getData(); @@ -284,9 +326,9 @@ private: typename Kernel::template State state; for (; prev < off; ++prev) { - Kernel::template accumulate(state, data_x[prev], data_y[prev]); + Kernel::template accumulate(state, data_x[prev], data_y[prev], kernel_params); } - result_data[row] = Kernel::finalize(state); + result_data[row] = Kernel::finalize(state, kernel_params); row++; } return result; @@ -294,7 +336,7 @@ private: /// Special case when the 1st parameter is Const template - ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const + ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { col_x = assert_cast(col_x.get())->getDataColumnPtr(); col_y = col_y->convertToFullColumnIfConst(); @@ -322,6 +364,8 @@ private: prev_offset = offsets_y[row]; } + const typename Kernel::ConstParams kernel_params = initConstParams(arguments); + auto result = ColumnVector::create(input_rows_count); auto & result_data = result->getData(); @@ -333,19 +377,59 @@ private: typename Kernel::template State state; for (size_t i = 0; prev < off; ++i, ++prev) { - Kernel::template accumulate(state, data_x[i], data_y[prev]); + Kernel::template accumulate(state, data_x[i], data_y[prev], kernel_params); } - result_data[row] = Kernel::finalize(state); + result_data[row] = Kernel::finalize(state, kernel_params); row++; } return result; } + typename Kernel::ConstParams initConstParams(const ColumnsWithTypeAndName &) const { return {}; } }; + +template <> +size_t FunctionArrayDistance::getNumberOfArguments() const { return 3; } + +template <> +ColumnNumbers FunctionArrayDistance::getArgumentsThatAreAlwaysConstant() const { return {2}; } + +template <> +LpDistance::ConstParams FunctionArrayDistance::initConstParams(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.size() < 3) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Argument p of function {} was not provided", + getName()); + + if (!arguments[2].column->isNumeric()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument p of function {} must be numeric constant", + getName()); + + if (!isColumnConst(*arguments[2].column) && arguments[2].column->size() != 1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Second argument for function {} must be either constant Float64 or constant UInt", + getName()); + + Float64 p = arguments[2].column->getFloat64(0); + if (p < 1 || p == HUGE_VAL) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Second argument for function {} must be not less than one and not be an infinity", + getName()); + + return LpDistance::ConstParams{p, 1 / p}; +} + /// These functions are used by TupleOrArrayFunction FunctionPtr createFunctionArrayL1Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayL2Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } +FunctionPtr createFunctionArrayLpDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index b3b5aff7063..20807b4a487 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -13,6 +13,7 @@ namespace DB { namespace ErrorCodes { + extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; } @@ -21,14 +22,16 @@ struct L1Norm { static inline String name = "L1"; + struct ConstParams {}; + template - inline static ResultType accumulate(ResultType result, ResultType value) + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + fabs(value); } template - inline static ResultType finalize(ResultType result) + inline static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -38,32 +41,59 @@ struct L2Norm { static inline String name = "L2"; + struct ConstParams {}; + template - inline static ResultType accumulate(ResultType result, ResultType value) + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + value * value; } template - inline static ResultType finalize(ResultType result) + inline static ResultType finalize(ResultType result, const ConstParams &) { return sqrt(result); } }; +struct LpNorm +{ + static inline String name = "Lp"; + + struct ConstParams + { + Float64 power; + Float64 inverted_power = 1 / power; + }; + + template + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params) + { + return result + std::pow(fabs(value), params.power); + } + + template + inline static ResultType finalize(ResultType result, const ConstParams & params) + { + return std::pow(result, params.inverted_power); + } +}; + struct LinfNorm { static inline String name = "Linf"; + struct ConstParams {}; + template - inline static ResultType accumulate(ResultType result, ResultType value) + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return fmax(result, fabs(value)); } template - inline static ResultType finalize(ResultType result) + inline static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -78,22 +108,17 @@ public: String getName() const override { return name; } static FunctionPtr create(ContextPtr) { return std::make_shared>(); } size_t getNumberOfArguments() const override { return 1; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - DataTypes types; - for (const auto & argument : arguments) - { - const auto * array_type = checkAndGetDataType(argument.type.get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); + const auto * array_type = checkAndGetDataType(arguments[0].type.get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); - types.push_back(array_type->getNestedType()); - } - const auto & common_type = getLeastSupertype(types); - switch (common_type->getTypeId()) + switch (array_type->getNestedType()->getTypeId()) { case TypeIndex::UInt8: case TypeIndex::UInt16: @@ -111,7 +136,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} has nested type {}. " "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - getName(), common_type->getName()); + getName(), array_type->getNestedType()->getName()); } } @@ -125,7 +150,7 @@ public: switch (result_type->getTypeId()) { case TypeIndex::Float64: - return executeWithResultType(*arr, type, input_rows_count); + return executeWithResultType(*arr, type, input_rows_count, arguments); break; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); @@ -148,13 +173,13 @@ private: template - ColumnPtr executeWithResultType(const ColumnArray & array, const DataTypePtr & nested_type, size_t input_rows_count) const + ColumnPtr executeWithResultType(const ColumnArray & array, const DataTypePtr & nested_type, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { switch (nested_type->getTypeId()) { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithTypes(array, input_rows_count); \ + return executeWithTypes(array, input_rows_count, arguments); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -170,7 +195,7 @@ private: } template - static ColumnPtr executeWithTypes(const ColumnArray & array, size_t input_rows_count) + ColumnPtr executeWithTypes(const ColumnArray & array, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { const auto & data = typeid_cast &>(array.getData()).getData(); const auto & offsets = array.getOffsets(); @@ -178,6 +203,8 @@ private: auto result_col = ColumnVector::create(input_rows_count); auto & result_data = result_col->getData(); + const typename Kernel::ConstParams kernel_params = initConstParams(arguments); + ColumnArray::Offset prev = 0; size_t row = 0; for (auto off : offsets) @@ -185,18 +212,59 @@ private: Float64 result = 0; for (; prev < off; ++prev) { - result = Kernel::template accumulate(result, data[prev]); + result = Kernel::template accumulate(result, data[prev], kernel_params); } - result_data[row] = Kernel::finalize(result); + result_data[row] = Kernel::finalize(result, kernel_params); row++; } return result_col; } + + typename Kernel::ConstParams initConstParams(const ColumnsWithTypeAndName &) const { return {}; } }; +template <> +size_t FunctionArrayNorm::getNumberOfArguments() const { return 2; } + +template <> +ColumnNumbers FunctionArrayNorm::getArgumentsThatAreAlwaysConstant() const { return {1}; } + +template <> +LpNorm::ConstParams FunctionArrayNorm::initConstParams(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.size() < 2) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Argument p of function {} was not provided", + getName()); + + if (!arguments[1].column->isNumeric()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument p of function {} must be numeric constant", + getName()); + + if (!isColumnConst(*arguments[1].column) && arguments[1].column->size() != 1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Second argument for function {} must be either constant Float64 or constant UInt", + getName()); + + Float64 p = arguments[1].column->getFloat64(0); + if (p < 1 || p == HUGE_VAL) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Second argument for function {} must be not less than one and not be an infinity", + getName()); + + return LpNorm::ConstParams{p, 1 / p}; +} + + /// These functions are used by TupleOrArrayFunction FunctionPtr createFunctionArrayL1Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayL2Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } +FunctionPtr createFunctionArrayLpNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } } diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 2c29db81dd6..ee271a67f07 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -810,12 +810,14 @@ public: const auto & p_column = arguments[1]; - const auto * p_column_const = assert_cast(p_column.column.get()); + if (!isColumnConst(*p_column.column) && p_column.column->size() != 1) + throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; + double p; - if (isFloat(p_column_const->getDataType())) - p = p_column_const->getFloat64(0); - else if (isUnsignedInteger(p_column_const->getDataType())) - p = p_column_const->getUInt(0); + if (isFloat(p_column.column->getDataType())) + p = p_column.column->getFloat64(0); + else if (isUnsignedInteger(p_column.column->getDataType())) + p = p_column.column->getUInt(0); else throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; @@ -1109,10 +1111,12 @@ private: extern FunctionPtr createFunctionArrayL1Norm(ContextPtr context_); extern FunctionPtr createFunctionArrayL2Norm(ContextPtr context_); +extern FunctionPtr createFunctionArrayLpNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayL1Distance(ContextPtr context_); extern FunctionPtr createFunctionArrayL2Distance(ContextPtr context_); +extern FunctionPtr createFunctionArrayLpDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_); @@ -1132,6 +1136,14 @@ struct L2NormTraits static constexpr auto CreateArrayFunction = createFunctionArrayL2Norm; }; +struct LpNormTraits +{ + static inline String name = "LpNorm"; + + static constexpr auto CreateTupleFunction = FunctionLpNorm::create; + static constexpr auto CreateArrayFunction = createFunctionArrayLpNorm; +}; + struct LinfNormTraits { static inline String name = "LinfNorm"; @@ -1156,6 +1168,14 @@ struct L2DistanceTraits static constexpr auto CreateArrayFunction = createFunctionArrayL2Distance; }; +struct LpDistanceTraits +{ + static inline String name = "LpDistance"; + + static constexpr auto CreateTupleFunction = FunctionLpDistance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayLpDistance; +}; + struct LinfDistanceTraits { static inline String name = "LinfDistance"; @@ -1174,10 +1194,12 @@ struct CosineDistanceTraits using TupleOrArrayFunctionL1Norm = TupleOrArrayFunction; using TupleOrArrayFunctionL2Norm = TupleOrArrayFunction; +using TupleOrArrayFunctionLpNorm = TupleOrArrayFunction; using TupleOrArrayFunctionLinfNorm = TupleOrArrayFunction; using TupleOrArrayFunctionL1Distance = TupleOrArrayFunction; using TupleOrArrayFunctionL2Distance = TupleOrArrayFunction; +using TupleOrArrayFunctionLpDistance = TupleOrArrayFunction; using TupleOrArrayFunctionLinfDistance = TupleOrArrayFunction; using TupleOrArrayFunctionCosineDistance = TupleOrArrayFunction; @@ -1200,7 +1222,7 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); factory.registerAlias("normL1", TupleOrArrayFunctionL1Norm::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normL2", TupleOrArrayFunctionL2Norm::name, FunctionFactory::CaseInsensitive); @@ -1210,7 +1232,7 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); factory.registerAlias("distanceL1", FunctionL1Distance::name, FunctionFactory::CaseInsensitive); factory.registerAlias("distanceL2", FunctionL2Distance::name, FunctionFactory::CaseInsensitive); diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference index b7db2dceee8..ebce2788fe9 100644 --- a/tests/queries/0_stateless/02282_array_distance.reference +++ b/tests/queries/0_stateless/02282_array_distance.reference @@ -1,5 +1,6 @@ 6 3.7416573867739413 +3.2071843327373397 3 0.00258509695694209 \N @@ -11,6 +12,9 @@ nan 7.0710678118654755 9.16515138991168 12.12435565298214 +5.917593844525055 +8.308858759453505 +9.932246380845738 2 5 4 diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql index 246b16daf65..75e4b0d653e 100644 --- a/tests/queries/0_stateless/02282_array_distance.sql +++ b/tests/queries/0_stateless/02282_array_distance.sql @@ -1,5 +1,6 @@ SELECT L1Distance([0, 0, 0], [1, 2, 3]); SELECT L2Distance([1, 2, 3], [0, 0, 0]); +SELECT LpDistance([1, 2, 3], [0, 0, 0], 3.5); SELECT LinfDistance([1, 2, 3], [0, 0, 0]); SELECT cosineDistance([1, 2, 3], [3, 5, 7]); @@ -26,6 +27,7 @@ CREATE TABLE vec2d (id UInt64, v Array(Float64)) ENGINE = Memory; INSERT INTO vec1 VALUES (1, [3, 4, 5]), (2, [2, 4, 8]), (3, [7, 7, 7]); SELECT L1Distance(v, [0, 0, 0]) FROM vec1; SELECT L2Distance(v, [0, 0, 0]) FROM vec1; +SELECT LpDistance(v, [0, 0, 0], 3.14) FROM vec1; SELECT LinfDistance([5, 4, 3], v) FROM vec1; SELECT cosineDistance([3, 2, 1], v) FROM vec1; SELECT LinfDistance(v, materialize([0, -2, 0])) FROM vec1; @@ -42,6 +44,10 @@ SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2d v2; SELECT L1Distance([0, 0], [1]); -- { serverError 190 } SELECT L2Distance([1, 2], (3,4)); -- { serverError 43 } +SELECT LpDistance([1, 2], [3,4]); -- { serverError 42 } +SELECT LpDistance([1, 2], [3,4], -1.); -- { serverError 69 } +SELECT LpDistance([1, 2], [3,4], 'aaa'); -- { serverError 43 } +SELECT LpDistance([1, 2], [3,4], materialize(2.7)); -- { serverError 44 } DROP TABLE vec1; DROP TABLE vec2; diff --git a/tests/queries/0_stateless/02283_array_norm.reference b/tests/queries/0_stateless/02283_array_norm.reference index 68dbce0b436..ebaadee321f 100644 --- a/tests/queries/0_stateless/02283_array_norm.reference +++ b/tests/queries/0_stateless/02283_array_norm.reference @@ -1,27 +1,28 @@ 6 7.0710678118654755 +10.882246697870885 2 -10803059573 4234902446.7343364 2096941042 -1 5 -2 2 -3 5.196152422706632 -4 0 +10803059573 4234902446.7343364 10803059573 4234902446.7343364 3122003357.3280888 2096941042 +1 7 5 4.601724723020627 4 +2 2 2 2 2 +3 9 5.196152422706632 4.506432087111623 3 +4 0 0 0 0 1 11 2 11 3 11 4 11 -1 5 -2 2 -3 5.196152422706632 -4 0 +1 7 5 4.601724723020627 4 +2 2 2 2 2 +3 9 5.196152422706632 4.506432087111623 3 +4 0 0 0 0 1 11 2 11 3 11 4 11 -1 5 -2 2 -3 5.196152422706632 -4 0 +1 7 5 4.601724723020627 4 +2 2 2 2 2 +3 9 5.196152422706632 4.506432087111623 3 +4 0 0 0 0 1 11 2 11 3 11 diff --git a/tests/queries/0_stateless/02283_array_norm.sql b/tests/queries/0_stateless/02283_array_norm.sql index 8408eea3f8b..6938618d633 100644 --- a/tests/queries/0_stateless/02283_array_norm.sql +++ b/tests/queries/0_stateless/02283_array_norm.sql @@ -1,5 +1,6 @@ SELECT L1Norm([1, 2, 3]); SELECT L2Norm([3., 4., 5.]); +SELECT LpNorm([3., 4., 5.], 1.1); SELECT LinfNorm([0, 0, 2]); -- Overflows @@ -7,6 +8,9 @@ WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -57341255 SELECT L1Norm(a), L2Norm(a), + LpNorm(a,1), + LpNorm(a,2), + LpNorm(a,3.14), LinfNorm(a); DROP TABLE IF EXISTS vec1; @@ -19,17 +23,23 @@ INSERT INTO vec1 VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); INSERT INTO vec1f VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); INSERT INTO vec1d VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); -SELECT id, L2Norm(v) FROM vec1; +SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1; -SELECT id, L2Norm(v) FROM vec1f; +SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1f; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1f; -SELECT id, L2Norm(v) FROM vec1d; +SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1d; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1d; SELECT L1Norm(1, 2); -- { serverError 42 } +SELECT LpNorm([1,2]); -- { serverError 42 } +SELECT LpNorm([1,2], -3.4); -- { serverError 69 } +SELECT LpNorm([1,2], 'aa'); -- { serverError 43 } +SELECT LpNorm([1,2], [1]); -- { serverError 43 } +SELECT LpNorm([1,2], materialize(3.14)); -- { serverError 44 } + DROP TABLE vec1; DROP TABLE vec1f; DROP TABLE vec1d; From 8099361cbc7ced80c6e2d72e88d2fab3d80795d6 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 27 May 2022 17:48:14 +0200 Subject: [PATCH 109/150] Update FileCache.cpp --- src/Common/FileCache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 6c76bf5c0b3..efb2f29d274 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -1053,7 +1053,8 @@ void LRUFileCache::assertCacheCellsCorrectness( if (file_segment->reserved_size != 0) { assert(cell.queue_iterator); - assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock)); + /// FIXME: this is too slow, need to make it O(1) + /// assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock)); } } } From 6361c5f38c9893345d10b2e9a4cd27aecc335777 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 27 May 2022 18:22:16 +0200 Subject: [PATCH 110/150] Fix for failed style check --- src/Functions/array/arrayDistance.cpp | 1 + src/Functions/array/arrayNorm.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 2121189dacb..d5359572437 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -17,6 +17,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; } struct L1Distance diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 20807b4a487..805368be5ee 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; + extern const int ARGUMENT_OUT_OF_BOUND; } struct L1Norm From 9b1b30855c6513dd49bb9ab53c48f21f54537c5e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 27 May 2022 18:25:11 +0200 Subject: [PATCH 111/150] Fixed check for HUGE_VAL --- src/Functions/array/arrayDistance.cpp | 2 +- src/Functions/array/arrayNorm.cpp | 2 +- src/Functions/vectorFunctions.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index d5359572437..3f7900b6c62 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -418,7 +418,7 @@ LpDistance::ConstParams FunctionArrayDistance::initConstParams(const getName()); Float64 p = arguments[2].column->getFloat64(0); - if (p < 1 || p == HUGE_VAL) + if (p < 1 || p >= HUGE_VAL) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument for function {} must be not less than one and not be an infinity", diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 805368be5ee..2142abc4c90 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -252,7 +252,7 @@ LpNorm::ConstParams FunctionArrayNorm::initConstParams(const ColumnsWith getName()); Float64 p = arguments[1].column->getFloat64(0); - if (p < 1 || p == HUGE_VAL) + if (p < 1 || p >= HUGE_VAL) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument for function {} must be not less than one and not be an infinity", diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index ee271a67f07..411b30040cc 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -821,7 +821,7 @@ public: else throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; - if (p < 1 || p == HUGE_VAL) + if (p < 1 || p >= HUGE_VAL) throw Exception{"Second argument for function " + getName() + " must be not less than one and not be an infinity", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; auto abs = FunctionFactory::instance().get("abs", context); From a061acadbec41568eee13d7548a1b3b197e13dac Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 27 May 2022 11:04:29 -0700 Subject: [PATCH 112/150] Remove std::move from trivially-copyable object --- src/Dictionaries/FlatDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index d0d9fba763c..bd664224d41 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -643,7 +643,7 @@ void registerDictionaryFlat(DictionaryFactory & factory) const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), std::move(configuration)); + return std::make_unique(dict_id, dict_struct, std::move(source_ptr), configuration); }; factory.registerLayout("flat", create_layout, false); From fa31d758d6603e5187f187930c14572fb155becc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 22:22:59 +0200 Subject: [PATCH 113/150] Update tests --- .../0_stateless/00189_time_zones_long.reference | 12 ++++++------ .../0_stateless/00735_long_conditional.reference | 4 ++-- .../0_stateless/00900_long_parquet_load.reference | 4 ++-- .../0_stateless/01098_msgpack_format.reference | 4 ++-- .../0_stateless/01307_orc_output_format.reference | 4 ++-- .../0_stateless/01905_to_json_string.reference | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/00189_time_zones_long.reference b/tests/queries/0_stateless/00189_time_zones_long.reference index c55542f59a6..e53ec7ca815 100644 --- a/tests/queries/0_stateless/00189_time_zones_long.reference +++ b/tests/queries/0_stateless/00189_time_zones_long.reference @@ -10,7 +10,7 @@ toStartOfDay 2014-09-30 00:00:00 2014-09-30 00:00:00 toMonday -2014-12-29 +2014-12-22 2014-12-22 2014-12-22 2014-12-29 @@ -54,7 +54,7 @@ toStartOfYear 2014-01-01 2014-01-01 toTime -1970-01-02 12:00:00 1970-01-02 12:00:00 +1970-01-02 11:00:00 1970-01-02 12:00:00 1970-01-02 10:00:00 1970-01-02 11:00:00 1970-01-02 09:00:00 1970-01-02 10:00:00 1970-01-02 18:00:00 1970-01-02 18:00:00 @@ -84,7 +84,7 @@ toDayOfWeek 3 2 toHour -23 +22 21 20 4 @@ -236,10 +236,10 @@ toString 2015-07-15 02:30:00 toUnixTimestamp 1426415400 +1426419000 1426422600 -1426426200 -1426393800 -1426455000 +1426390200 +1426451400 1426415400 1426415400 1426415400 diff --git a/tests/queries/0_stateless/00735_long_conditional.reference b/tests/queries/0_stateless/00735_long_conditional.reference index f6c06e64066..05383d6e1f0 100644 --- a/tests/queries/0_stateless/00735_long_conditional.reference +++ b/tests/queries/0_stateless/00735_long_conditional.reference @@ -94,7 +94,7 @@ value vs value 1970-01-01 1970-01-02 1970-01-02 Date Date Date 2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') 2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +1970-01-01 02:00:00 1970-01-01 02:00:01 1970-01-01 02:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') column vs value 0 1 1 Int8 Int8 Int8 0 1 1 Int8 Int16 Int16 @@ -191,4 +191,4 @@ column vs value 1970-01-01 1970-01-02 1970-01-02 Date Date Date 2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') 2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +1970-01-01 02:00:00 1970-01-01 02:00:01 1970-01-01 02:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index b295a226853..72ec99ad2c6 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -3,8 +3,8 @@ 1 0 1 1 1 10 1.1 10.1 01/01/09 1 1230768060 === Try load data from alltypes_list.parquet [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] -[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] -[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] +[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['1999-12-31 23:00:00','2000-12-31 23:00:00','2001-12-31 23:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] +[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['1999-12-31 23:00:00','2000-12-31 23:00:00','2001-12-31 23:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] === Try load data from alltypes_plain.parquet 4 1 0 0 0 0 0 0 03/01/09 0 1235865600 5 0 1 1 1 10 1.1 10.1 03/01/09 1 1235865660 diff --git a/tests/queries/0_stateless/01098_msgpack_format.reference b/tests/queries/0_stateless/01098_msgpack_format.reference index 384852f24a7..cfe3501cb88 100644 --- a/tests/queries/0_stateless/01098_msgpack_format.reference +++ b/tests/queries/0_stateless/01098_msgpack_format.reference @@ -1,9 +1,9 @@ 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 [1,2,3,4,5] 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 [5,4,3,2,1] -42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 02:00:42 1970-01-01 02:00:00.042 [42] 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 [1,2,3,4,5] 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 [5,4,3,2,1] -42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 02:00:42 1970-01-01 02:00:00.042 [42] [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] [0,1,2,3,42,253,254,255] diff --git a/tests/queries/0_stateless/01307_orc_output_format.reference b/tests/queries/0_stateless/01307_orc_output_format.reference index e185c02a3e5..657d28b3093 100644 --- a/tests/queries/0_stateless/01307_orc_output_format.reference +++ b/tests/queries/0_stateless/01307_orc_output_format.reference @@ -1,6 +1,6 @@ 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.00000001 100000.00000000000001 1 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.123123123 123123123.123123123123123 \N -42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.42 42.42424242 424242.42424242424242 42 +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 02:00:42 42.42 42.42424242 424242.42424242424242 42 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.00000001 100000.00000000000001 1 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.123123123 123123123.123123123123123 \N -42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.42 42.42424242 424242.42424242424242 42 +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 02:00:42 42.42 42.42424242 424242.42424242424242 42 diff --git a/tests/queries/0_stateless/01905_to_json_string.reference b/tests/queries/0_stateless/01905_to_json_string.reference index e669022f208..33d435f8e1a 100644 --- a/tests/queries/0_stateless/01905_to_json_string.reference +++ b/tests/queries/0_stateless/01905_to_json_string.reference @@ -1,3 +1,3 @@ -[] 2947817982 "&" -69802.9769 "w" -1.9158530982937093e25 ["2003-05-15","1988-03-19 07:13:49","2090-04-14 03:58:26.029","91943d2e-480d-66b5-ee4c-1b5bb8eb7256"] "O" [] +[] 2947817982 "&" -69802.9769 "w" -1.9158530982937093e25 ["2003-05-15","1988-03-19 06:13:49","2090-04-14 03:58:26.029","91943d2e-480d-66b5-ee4c-1b5bb8eb7256"] "O" [] [-115] 481807067 ",{MM" -170235.0663 "o" 3.3808659558052087e155 ["2055-01-12","2070-08-09 03:49:21","2068-11-30 09:36:49.672","20b0e7b5-ad0e-177b-3054-c779b2a8ebe0"] "I\\u001C" ["e57178f9-4d10-2fa1-7c2d-53c5a65c3463"] {"1234":"5678"} From 10c97164677843da83c540c846d08b5948593f9f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 27 May 2022 22:48:07 +0200 Subject: [PATCH 114/150] Fix clang-tidy --- src/Common/FileCache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index f66287b805f..3962679770b 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -301,7 +301,7 @@ private: size_t getFileSegmentsNumUnlocked(std::lock_guard & cache_lock) const; - void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard & cache_lock); + static void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard & cache_lock); public: String dumpStructure(const Key & key_) override; From c50791dd3babb806626fe2229364f8161b787600 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 22:51:37 +0200 Subject: [PATCH 115/150] Fix clang-tidy-14, part 1 --- src/Client/ClientBase.cpp | 7 ++++--- src/Common/ThreadFuzzer.cpp | 8 ++++++-- src/Common/filesystemHelpers.cpp | 3 +-- src/Daemon/BaseDaemon.cpp | 8 ++++---- src/Databases/MySQL/DatabaseMySQL.cpp | 16 ++++++++-------- src/Dictionaries/MySQLDictionarySource.cpp | 16 +++++++--------- src/Dictionaries/RangeHashedDictionary.cpp | 8 ++++---- src/Formats/FormatFactory.cpp | 5 ++--- src/Functions/CRC.cpp | 2 +- src/Functions/FunctionsBinaryRepresentation.cpp | 2 +- src/Functions/FunctionsJSON.cpp | 2 +- src/Functions/URL/port.cpp | 3 +-- src/Functions/pointInPolygon.cpp | 2 +- src/Interpreters/DatabaseCatalog.cpp | 2 +- .../TranslateQualifiedNamesVisitor.cpp | 2 +- src/Processors/Executors/PollingQueue.cpp | 2 +- src/Server/MySQLHandlerFactory.cpp | 11 +++++++++-- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 4 ++-- 20 files changed, 57 insertions(+), 50 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 9cc31df0b43..d678441d442 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -285,11 +285,11 @@ void ClientBase::setupSignalHandler() sigemptyset(&new_act.sa_mask); #else if (sigemptyset(&new_act.sa_mask)) - throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); #endif if (sigaction(SIGINT, &new_act, nullptr)) - throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); } @@ -492,7 +492,8 @@ try String pager = config().getString("pager", ""); if (!pager.empty()) { - signal(SIGPIPE, SIG_IGN); + if (SIG_ERR == signal(SIGPIPE, SIG_IGN)) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); ShellCommand::Config config(pager); config.pipe_stdin_only = true; diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 9d07edeb502..962cfee074d 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -1,3 +1,5 @@ +// NOLINTBEGIN(readability-inconsistent-declaration-parameter-name) + #include #include #if defined(OS_LINUX) @@ -292,8 +294,8 @@ void ThreadFuzzer::setup() const #if THREAD_FUZZER_WRAP_PTHREAD # define MAKE_WRAPPER(RET, NAME, ...) \ - extern "C" RET __##NAME(__VA_ARGS__); /* NOLINT */ \ - extern "C" RET NAME(__VA_ARGS__) /* NOLINT */ \ + extern "C" RET __##NAME(__VA_ARGS__); \ + extern "C" RET NAME(__VA_ARGS__) \ { \ injection( \ NAME##_before_yield_probability.load(std::memory_order_relaxed), \ @@ -317,3 +319,5 @@ FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER) # undef MAKE_WRAPPER #endif } + +// NOLINTEND(readability-inconsistent-declaration-parameter-name) diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index ca06b21ab3a..6d9e0859692 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -1,10 +1,8 @@ #include "filesystemHelpers.h" -#include #if defined(__linux__) # include # include -# include # include #endif #include @@ -13,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f7cfbab289a..c5341fb0ac1 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -76,7 +76,8 @@ DB::PipeFDs signal_pipe; */ static void call_default_signal_handler(int sig) { - signal(sig, SIG_DFL); + if (SIG_ERR == signal(sig, SIG_DFL)) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); raise(sig); } @@ -498,9 +499,8 @@ BaseDaemon::~BaseDaemon() signal_listener_thread.join(); /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. for (int sig : handled_signals) - { - signal(sig, SIG_DFL); - } + if (SIG_ERR == signal(sig, SIG_DFL)) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); signal_pipe.close(); } diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 279867542e2..446518be5cd 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -89,7 +89,7 @@ bool DatabaseMySQL::empty() const return true; for (const auto & [table_name, storage_info] : local_tables_cache) - if (!remove_or_detach_tables.count(table_name)) + if (!remove_or_detach_tables.contains(table_name)) return false; return true; @@ -103,7 +103,7 @@ DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_cont fetchTablesIntoLocalCache(local_context); for (const auto & [table_name, modify_time_and_storage] : local_tables_cache) - if (!remove_or_detach_tables.count(table_name) && (!filter_by_table_name || filter_by_table_name(table_name))) + if (!remove_or_detach_tables.contains(table_name) && (!filter_by_table_name || filter_by_table_name(table_name))) tables[table_name] = modify_time_and_storage.second; return std::make_unique(tables, database_name); @@ -120,7 +120,7 @@ StoragePtr DatabaseMySQL::tryGetTable(const String & mysql_table_name, ContextPt fetchTablesIntoLocalCache(local_context); - if (!remove_or_detach_tables.count(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end()) + if (!remove_or_detach_tables.contains(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end()) return local_tables_cache[mysql_table_name].second; return StoragePtr{}; @@ -349,11 +349,11 @@ void DatabaseMySQL::attachTable(ContextPtr /* context_ */, const String & table_ { std::lock_guard lock{mutex}; - if (!local_tables_cache.count(table_name)) + if (!local_tables_cache.contains(table_name)) throw Exception("Cannot attach table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " because it does not exist.", ErrorCodes::UNKNOWN_TABLE); - if (!remove_or_detach_tables.count(table_name)) + if (!remove_or_detach_tables.contains(table_name)) throw Exception("Cannot attach table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " because it already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); @@ -372,11 +372,11 @@ StoragePtr DatabaseMySQL::detachTable(ContextPtr /* context */, const String & t { std::lock_guard lock{mutex}; - if (remove_or_detach_tables.count(table_name)) + if (remove_or_detach_tables.contains(table_name)) throw Exception("Table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " is dropped", ErrorCodes::TABLE_IS_DROPPED); - if (!local_tables_cache.count(table_name)) + if (!local_tables_cache.contains(table_name)) throw Exception("Table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); @@ -412,7 +412,7 @@ void DatabaseMySQL::detachTablePermanently(ContextPtr, const String & table_name fs::path remove_flag = fs::path(getMetadataPath()) / (escapeForFileName(table_name) + suffix); - if (remove_or_detach_tables.count(table_name)) + if (remove_or_detach_tables.contains(table_name)) throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); if (fs::exists(remove_flag)) diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 57d61ce5724..22ca5a5b08c 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -16,6 +16,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include "readInvalidateQuery.h" namespace DB @@ -118,15 +125,6 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) #if USE_MYSQL -# include -# include -# include -# include -# include -# include -# include "readInvalidateQuery.h" -# include -# include namespace DB { diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 20230b1bd32..261e9166ec8 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -104,7 +104,7 @@ ColumnPtr RangeHashedDictionary::getColumn( /// Cast range column to storage type Columns modified_key_columns = key_columns; - auto range_storage_column = key_columns.back(); + const ColumnPtr & range_storage_column = key_columns.back(); ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); @@ -314,7 +314,7 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Colum } /// Cast range column to storage type - auto range_storage_column = key_columns.back(); + const ColumnPtr & range_storage_column = key_columns.back(); ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; auto range_column_updated = castColumnAccurate(column_to_cast, dict_struct.range_min->type); auto key_columns_copy = key_columns; @@ -513,7 +513,7 @@ void RangeHashedDictionary::getItemsImpl( size_t keys_found = 0; - auto range_column = key_columns.back(); + const ColumnPtr & range_column = key_columns.back(); auto key_columns_copy = key_columns; key_columns_copy.pop_back(); @@ -984,7 +984,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names Columns result; result.reserve(attribute_names_size); - auto key_column = key_columns.back(); + const ColumnPtr & key_column = key_columns.back(); const auto * key_to_index_column = typeid_cast(key_column.get()); if (!key_to_index_column) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 644e4d3ecfd..6a7eb88bca1 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -492,10 +492,9 @@ String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_ String FormatFactory::getFormatFromFileDescriptor(int fd) { #ifdef OS_LINUX - char buf[32] = {'\0'}; - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd); + std::string proc_path = fmt::format("/proc/self/fd/{}", fd); char file_path[PATH_MAX] = {'\0'}; - if (readlink(buf, file_path, sizeof(file_path) - 1) != -1) + if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1) return getFormatFromFileName(file_path, false); return ""; #elif defined(__APPLE__) diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index abcf137f2e7..b7c6c1195ea 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -118,7 +118,7 @@ struct CRCFunctionWrapper private: static ReturnType doCRC(const ColumnString::Chars & buf, size_t offset, size_t size) { - const unsigned char * p = reinterpret_cast(&buf[0]) + offset; + const unsigned char * p = reinterpret_cast(buf.data()) + offset; return Impl::makeCRC(p, size); } }; diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 582dd1f1049..a13558133d3 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -91,7 +91,7 @@ struct HexImpl out_vec.resize(size * hex_length); size_t pos = 0; - char * out = reinterpret_cast(&out_vec[0]); + char * out = reinterpret_cast(out_vec.data()); for (size_t i = 0; i < size; ++i) { const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index d2bcb646ecf..49546aac92b 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -123,7 +123,7 @@ public: bool document_ok = false; if (col_json_const) { - std::string_view json{reinterpret_cast(&chars[0]), offsets[0] - 1}; + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; document_ok = parser.parse(json, document); } diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index afe27c9240c..e2dbc75ab3f 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -91,7 +91,7 @@ private: static UInt16 extractPort(UInt16 default_port, const ColumnString::Chars & buf, size_t offset, size_t size) { - const char * p = reinterpret_cast(&buf[0]) + offset; + const char * p = reinterpret_cast(buf.data()) + offset; const char * end = p + size; StringRef host = getURLHost(p, size); @@ -127,4 +127,3 @@ void registerFunctionPort(FunctionFactory & factory) } } - diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index c3a9c411cbc..7d2369fd5e7 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -167,7 +167,7 @@ public: const auto & tuple_columns = tuple_col->getColumns(); - const ColumnWithTypeAndName poly = arguments[1]; + const ColumnWithTypeAndName & poly = arguments[1]; const IColumn * poly_col = poly.column.get(); const ColumnConst * const_poly_col = checkAndGetColumn(poly_col); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 267564eb84c..2589df0986a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -647,7 +647,7 @@ std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForData { std::unique_lock lock(ddl_guards_mutex); db_guard_iter = ddl_guards.try_emplace(database).first; - assert(db_guard_iter->second.first.count("")); + assert(db_guard_iter->second.first.contains("")); } DatabaseGuard & db_guard = db_guard_iter->second; return std::unique_lock{db_guard.second}; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 35fb0828b3e..3129f9d7fe2 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -253,7 +253,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { for (const auto & column : table.columns) { - if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name))) + if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.contains(column.name))) { addIdentifier(columns, table.table, column.name); } diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp index a601d426a5d..270f495a2f0 100644 --- a/src/Processors/Executors/PollingQueue.cpp +++ b/src/Processors/Executors/PollingQueue.cpp @@ -38,7 +38,7 @@ PollingQueue::~PollingQueue() void PollingQueue::addTask(size_t thread_number, void * data, int fd) { std::uintptr_t key = reinterpret_cast(data); - if (tasks.count(key)) + if (tasks.contains(key)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Task {} was already added to task queue", key); tasks[key] = TaskData{thread_number, data, fd}; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index 7263b234068..c02a3015945 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; extern const int NO_ELEMENTS_IN_CONFIG; extern const int OPENSSL_ERROR; } @@ -66,7 +67,10 @@ void MySQLHandlerFactory::readRSAKeys() FILE * fp = fopen(certificate_file.data(), "r"); if (fp == nullptr) throw Exception("Cannot open certificate file: " + certificate_file + ".", ErrorCodes::CANNOT_OPEN_FILE); - SCOPE_EXIT(fclose(fp)); + SCOPE_EXIT( + if (0 != fclose(fp)) + throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); + ); X509 * x509 = PEM_read_X509(fp, nullptr, nullptr, nullptr); SCOPE_EXIT(X509_free(x509)); @@ -89,7 +93,10 @@ void MySQLHandlerFactory::readRSAKeys() FILE * fp = fopen(private_key_file.data(), "r"); if (fp == nullptr) throw Exception ("Cannot open private key file " + private_key_file + ".", ErrorCodes::CANNOT_OPEN_FILE); - SCOPE_EXIT(fclose(fp)); + SCOPE_EXIT( + if (0 != fclose(fp)) + throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); + ); private_key.reset(PEM_read_RSAPrivateKey(fp, nullptr, nullptr, nullptr)); if (!private_key) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index be1c9ffd370..ca258da7b5d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3072,7 +3072,7 @@ void MergeTreeData::forgetPartAndMoveToDetached(const MergeTreeData::DataPartPtr throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); /// What if part_to_detach is a reference to *it_part? Make a new owner just in case. - DataPartPtr part = *it_part; + const DataPartPtr & part = *it_part; if (part->getState() == DataPartState::Active) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a8e4854f809..1866d65ccfd 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -840,7 +840,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { - Tokens tokens(&indices[0], &indices[indices.size()], settings.max_query_size); + Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size); IParser::Pos pos(tokens, settings.max_parser_depth); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index a19e3a778cd..e7882ce4952 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -319,12 +319,12 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) size_t columns_size; in >> columns_size >> "\n"; columns_str.resize(columns_size); - in.readStrict(&columns_str[0], columns_size); + in.readStrict(columns_str.data(), columns_size); in >> "\nmetadata_str_size:\n"; size_t metadata_size; in >> metadata_size >> "\n"; metadata_str.resize(metadata_size); - in.readStrict(&metadata_str[0], metadata_size); + in.readStrict(metadata_str.data(), metadata_size); } else if (type_str == "sync_pinned_part_uuids") { From 6c2699a991e0535f6962b322d22d2c0182ccd925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 23:00:23 +0200 Subject: [PATCH 116/150] Fix clang-tidy-14, part 1 --- src/Daemon/BaseDaemon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index c5341fb0ac1..2bf699da9c7 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -77,7 +77,7 @@ DB::PipeFDs signal_pipe; static void call_default_signal_handler(int sig) { if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); raise(sig); } @@ -500,7 +500,7 @@ BaseDaemon::~BaseDaemon() /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. for (int sig : handled_signals) if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); signal_pipe.close(); } From d62c57be3fafdc9d9b12073e5f63ea69db6ff46b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 23:02:05 +0200 Subject: [PATCH 117/150] Fix clang-tidy-14, part 1 --- src/Daemon/BaseDaemon.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 2bf699da9c7..bb36e0eb1ea 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -68,6 +68,14 @@ namespace fs = std::filesystem; +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_SET_SIGNAL_HANDLER; + } +} + DB::PipeFDs signal_pipe; From d6597efc08ffac90d9e89cbda2b3e86d7e84dad4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 23:03:16 +0200 Subject: [PATCH 118/150] Fix clang-tidy-14, part 1 --- src/Daemon/BaseDaemon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bb36e0eb1ea..1b1e4611dc2 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -85,7 +85,7 @@ DB::PipeFDs signal_pipe; static void call_default_signal_handler(int sig) { if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); raise(sig); } @@ -508,7 +508,7 @@ BaseDaemon::~BaseDaemon() /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. for (int sig : handled_signals) if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); signal_pipe.close(); } From f3e83cb222a9c155ac32d5601c7b4198c14004c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 28 May 2022 01:51:41 +0300 Subject: [PATCH 119/150] Update star-schema.md --- docs/en/getting-started/example-datasets/star-schema.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/getting-started/example-datasets/star-schema.md b/docs/en/getting-started/example-datasets/star-schema.md index 35ff492c360..ea855a664a9 100644 --- a/docs/en/getting-started/example-datasets/star-schema.md +++ b/docs/en/getting-started/example-datasets/star-schema.md @@ -26,7 +26,6 @@ $ ./dbgen -s 1000 -T c $ ./dbgen -s 1000 -T l $ ./dbgen -s 1000 -T p $ ./dbgen -s 1000 -T s -$ ./dbgen -s 1000 -T d ``` Creating tables in ClickHouse: From 39a55991ca49d6e5c809a38a490184c8b92b98fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 28 May 2022 01:18:07 +0200 Subject: [PATCH 120/150] Change Playground URL in the docs --- docs/en/development/contrib.md | 2 +- docs/en/getting-started/example-datasets/brown-benchmark.md | 2 +- docs/en/getting-started/example-datasets/cell-towers.md | 4 ++-- docs/en/getting-started/example-datasets/menus.md | 2 +- docs/en/getting-started/example-datasets/ontime.md | 2 +- docs/en/getting-started/example-datasets/opensky.md | 2 +- docs/en/getting-started/example-datasets/recipes.md | 2 +- docs/en/getting-started/example-datasets/uk-price-paid.md | 2 +- docs/ru/development/contrib.md | 2 +- docs/ru/getting-started/example-datasets/brown-benchmark.md | 3 +-- docs/ru/getting-started/example-datasets/cell-towers.md | 2 +- docs/ru/getting-started/example-datasets/recipes.md | 2 +- 12 files changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 0a254f8c8ae..21ec7cf635b 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -92,7 +92,7 @@ The list of third-party libraries can be obtained by the following query: SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -[Example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) +[Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) ## Adding new third-party libraries and maintaining patches in third-party libraries {#adding-third-party-libraries} diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md index 0960756dbe9..b8e6140c60f 100644 --- a/docs/en/getting-started/example-datasets/brown-benchmark.md +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -411,6 +411,6 @@ ORDER BY yr, mo; ``` -The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.com/play?user=play), [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). +The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). [Original article](https://clickhouse.com/docs/en/getting_started/example_datasets/brown-benchmark/) diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index 7a35a28faa6..8da7761eea4 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -126,6 +126,6 @@ SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.com/play?user=play), [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). +The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). -Although you cannot create temporary tables there. \ No newline at end of file +Although you cannot create temporary tables there. diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index c572dcdb491..fd20c75f707 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -351,4 +351,4 @@ At least they have caviar with vodka. Very nice. ## Online Playground {#playground} -The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==). +The data is uploaded to ClickHouse Playground, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==). diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index aa181a7deff..4b24d8fd6e7 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -398,7 +398,7 @@ ORDER BY c DESC LIMIT 10; ``` -You can also play with the data in Playground, [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==). +You can also play with the data in Playground, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==). This performance test was created by Vadim Tkachenko. See: diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index f55ebc79590..b38021c34eb 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -417,4 +417,4 @@ Result: ### Online Playground {#playground} -You can test other queries to this data set using the interactive resource [Online Playground](https://gh-api.clickhouse.com/play?user=play). For example, [like this](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here. +You can test other queries to this data set using the interactive resource [Online Playground](https://play.clickhouse.com/play?user=play). For example, [like this](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here. diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index 5b10c7c9c2c..37a6eeebea5 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -334,6 +334,6 @@ Result: ### Online Playground -The dataset is also available in the [Online Playground](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). +The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). [Original article](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/) diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index eaec6e53ed4..b7a486fb057 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -646,4 +646,4 @@ no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows ### Test It in Playground {#playground} -The dataset is also available in the [Online Playground](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). +The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index b98ed847a0b..1b99ec97553 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -92,7 +92,7 @@ sidebar_label: "Используемые сторонние библиотеки SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -[Пример](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) +[Пример](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) ## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries} diff --git a/docs/ru/getting-started/example-datasets/brown-benchmark.md b/docs/ru/getting-started/example-datasets/brown-benchmark.md index 8d2605f4a9f..8afda860b72 100644 --- a/docs/ru/getting-started/example-datasets/brown-benchmark.md +++ b/docs/ru/getting-started/example-datasets/brown-benchmark.md @@ -411,5 +411,4 @@ ORDER BY yr, mo; ``` -Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.com/play?user=play), [пример](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). - +Данные также доступны для работы с интерактивными запросами через [Playground](https://play.clickhouse.com/play?user=play), [пример](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). diff --git a/docs/ru/getting-started/example-datasets/cell-towers.md b/docs/ru/getting-started/example-datasets/cell-towers.md index 254d53ad7e1..49174994c14 100644 --- a/docs/ru/getting-started/example-datasets/cell-towers.md +++ b/docs/ru/getting-started/example-datasets/cell-towers.md @@ -125,4 +125,4 @@ SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -Вы можете протестировать другие запросы с помощью интерактивного ресурса [Playground](https://gh-api.clickhouse.com/play?user=play). Например, [вот так](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). Однако, обратите внимание, что здесь нельзя создавать временные таблицы. +Вы можете протестировать другие запросы с помощью интерактивного ресурса [Playground](https://play.clickhouse.com/play?user=play). Например, [вот так](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). Однако, обратите внимание, что здесь нельзя создавать временные таблицы. diff --git a/docs/ru/getting-started/example-datasets/recipes.md b/docs/ru/getting-started/example-datasets/recipes.md index 08838f1c950..f3b4c8285d7 100644 --- a/docs/ru/getting-started/example-datasets/recipes.md +++ b/docs/ru/getting-started/example-datasets/recipes.md @@ -337,6 +337,6 @@ WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'; ### Online Playground -Этот набор данных доступен в [Online Playground](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). +Этот набор данных доступен в [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). [Оригинальная статья](https://clickhouse.com/docs/ru/getting-started/example-datasets/recipes/) From f7a5b1fdafc3f83106f159d56e0d2677d7b02d2d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 28 May 2022 03:24:56 +0200 Subject: [PATCH 121/150] Remove margin in test reports and change the font --- docker/test/fuzzer/run-fuzzer.sh | 18 ++---------------- tests/ci/report.py | 17 ++--------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 32799a669eb..f186cf46adf 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -355,22 +355,9 @@ fi cat > report.html < -