Merge branch 'master' of github.com:yandex/ClickHouse

2024-11-22 15:42:02 +00:00 · 2020-03-26 13:33:11 +03:00 · 2020-03-26 13:33:11 +03:00 · 656234ab5c
commit 656234ab5c
parent 7691043df9 f0d8c8a1f6
18 changed files with 230 additions and 60 deletions
--- a/dbms/src/Functions/FunctionBinaryArithmetic.h
+++ b/dbms/src/Functions/FunctionBinaryArithmetic.h
@ -833,6 +833,19 @@ public:
                        type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
                    else if constexpr (IsDataTypeDecimal<RightDataType>)
                        type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
+                    else if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime>)
+                    {
+                        // Special case for DateTime: binary OPS should reuse timezone
+                        // of DateTime argument as timezeone of result type.
+                        // NOTE: binary plus/minus are not allowed on DateTime64, and we are not handling it here.
+
+                        const TimezoneMixin * tz = nullptr;
+                        if constexpr (std::is_same_v<RightDataType, DataTypeDateTime>)
+                                tz = &right;
+                        if constexpr (std::is_same_v<LeftDataType, DataTypeDateTime>)
+                                tz = &left;
+                        type_res = std::make_shared<ResultDataType>(*tz);
+                    }
                    else
                        type_res = std::make_shared<ResultDataType>();
                    return true;
--- a/dbms/src/Functions/extractTimeZoneFromFunctionArguments.cpp
+++ b/dbms/src/Functions/extractTimeZoneFromFunctionArguments.cpp
@ -42,9 +42,9 @@ std::string extractTimeZoneNameFromFunctionArguments(const ColumnsWithTypeAndNam
            return {};

        /// If time zone is attached to an argument of type DateTime.
-        if (const DataTypeDateTime * type = checkAndGetDataType<DataTypeDateTime>(arguments[datetime_arg_num].type.get()))
+        if (const auto * type = checkAndGetDataType<DataTypeDateTime>(arguments[datetime_arg_num].type.get()))
            return type->getTimeZone().getTimeZone();
-        if (const DataTypeDateTime64 * type = checkAndGetDataType<DataTypeDateTime64>(arguments[datetime_arg_num].type.get()))
+        if (const auto * type = checkAndGetDataType<DataTypeDateTime64>(arguments[datetime_arg_num].type.get()))
            return type->getTimeZone().getTimeZone();

        return {};
@ -61,7 +61,9 @@ const DateLUTImpl & extractTimeZoneFromFunctionArguments(Block & block, const Co
            return DateLUT::instance();

        /// If time zone is attached to an argument of type DateTime.
-        if (const DataTypeDateTime * type = checkAndGetDataType<DataTypeDateTime>(block.getByPosition(arguments[datetime_arg_num]).type.get()))
+        if (const auto * type = checkAndGetDataType<DataTypeDateTime>(block.getByPosition(arguments[datetime_arg_num]).type.get()))
+            return type->getTimeZone();
+        if (const auto * type = checkAndGetDataType<DataTypeDateTime64>(block.getByPosition(arguments[datetime_arg_num]).type.get()))
            return type->getTimeZone();

        return DateLUT::instance();
--- a/dbms/src/Functions/formatDateTime.cpp
+++ b/dbms/src/Functions/formatDateTime.cpp
@ -314,7 +314,7 @@ public:
        size_t result_size = pattern_to_fill.size();

        const DateLUTImpl * time_zone_tmp = nullptr;
-        if (arguments.size() == 3)
+        if (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
            time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
        else
            time_zone_tmp = &DateLUT::instance();
--- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
+++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
@ -98,8 +98,8 @@ private:
                throw Exception("Distributed table should have an alias when distributed_product_mode set to local.",
                                ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED);

-            database_and_table = createTableIdentifier(database, table);
-            database_and_table->setAlias(alias);
+            auto & identifier = database_and_table->as<ASTIdentifier &>();
+            identifier.resetTable(database, table);
        }
        else
            throw Exception("InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting",
--- a/dbms/src/Interpreters/MutationsInterpreter.cpp
+++ b/dbms/src/Interpreters/MutationsInterpreter.cpp
@ -620,38 +620,23 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
        }
        select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
    }
-    auto metadata = storage->getInMemoryMetadata();
    /// We have to execute select in order of primary key
    /// because we don't sort results additionaly and don't have
    /// any guarantees on data order without ORDER BY. It's almost free, because we
    /// have optimization for data read in primary key order.
-    if (metadata.order_by_ast)
+    if (ASTPtr key_expr = storage->getSortingKeyAST(); key_expr && !key_expr->children.empty())
    {
        ASTPtr dummy;
-
-        ASTPtr key_expr;
-        if (metadata.primary_key_ast)
-            key_expr = metadata.primary_key_ast;
-        else
-            key_expr = metadata.order_by_ast;
-
-        bool empty = false;
-        /// In all other cases we cannot have empty key
-        if (auto key_function = key_expr->as<ASTFunction>())
-            empty = key_function->arguments->children.empty();
-
-        /// Not explicitely spicified empty key
-        if (!empty)
+        auto res = std::make_shared<ASTExpressionList>();
+        for (const auto & key_part : key_expr->children)
        {
            auto order_by_expr = std::make_shared<ASTOrderByElement>(1, 1, false, dummy, false, dummy, dummy, dummy);
+            order_by_expr->children.push_back(key_part);

-
-            order_by_expr->children.push_back(key_expr);
-            auto res = std::make_shared<ASTExpressionList>();
            res->children.push_back(order_by_expr);
-
-            select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
        }
+
+        select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
    }

    return select;
--- a/dbms/src/Parsers/ASTIdentifier.cpp
+++ b/dbms/src/Parsers/ASTIdentifier.cpp
@ -101,6 +101,15 @@ void ASTIdentifier::appendColumnNameImpl(WriteBuffer & ostr) const
    writeString(name, ostr);
 }

+void ASTIdentifier::resetTable(const String & database_name, const String & table_name)
+{
+    auto ast = createTableIdentifier(database_name, table_name);
+    auto & ident = ast->as<ASTIdentifier &>();
+    name.swap(ident.name);
+    name_parts.swap(ident.name_parts);
+    uuid = ident.uuid;
+}
+
 ASTPtr createTableIdentifier(const String & database_name, const String & table_name)
 {
    assert(database_name != "_temporary_and_external_tables");
--- a/dbms/src/Parsers/ASTIdentifier.h
+++ b/dbms/src/Parsers/ASTIdentifier.h
@ -49,6 +49,8 @@ public:
        return name;
    }

+    void resetTable(const String & database_name, const String & table_name);
+
 protected:
    void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
    void appendColumnNameImpl(WriteBuffer & ostr) const override;
--- a/dbms/tests/msan_suppressions.txt
+++ b/dbms/tests/msan_suppressions.txt
@ -13,3 +13,10 @@ fun:BN_add_word
 fun:bn_div_fixed_top
 fun:bn_mul_words
 fun:BN_cmp
+
+# Suppress some failures in contrib so that we can enable MSan in CI.
+# Ideally, we should report these upstream.
+src:*/contrib/zlib-ng/*
+src:*/contrib/openssl/*
+src:*/contrib/simdjson/*
+
--- a/dbms/tests/queries/0_stateless/00921_datetime64_basic.reference
+++ b/dbms/tests/queries/0_stateless/00921_datetime64_basic.reference
@ -1,3 +1,3 @@
 2019-09-16 19:20:11.000
-2019-05-03 11:25:25.123	2019-05-03	2019-05-02 21:00:00	2019-04-01	1970-01-02 11:25:25	2019-05-03 11:25:00
+2019-05-03 11:25:25.123	2019-05-03	2019-05-03 00:00:00	2019-04-01	1970-01-02 11:25:25	2019-05-03 11:25:00
 2019-09-16 19:20:11.234
--- a/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python
+++ b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python
@ -68,13 +68,13 @@ subtractHours(N, 1)
 subtractMinutes(N, 1)
 subtractSeconds(N, 1)
 subtractQuarters(N, 1)
-CAST(N as DateTime)
+CAST(N as DateTime('Europe/Minsk'))
 CAST(N as Date)
 CAST(N as UInt64)
-CAST(N as DateTime64(0))
-CAST(N as DateTime64(3))
-CAST(N as DateTime64(6))
-CAST(N as DateTime64(9))
+CAST(N as DateTime64(0, 'Europe/Minsk'))
+CAST(N as DateTime64(3, 'Europe/Minsk'))
+CAST(N as DateTime64(6, 'Europe/Minsk'))
+CAST(N as DateTime64(9, 'Europe/Minsk'))
 # Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer.
 # CAST(N as DateTime64(12))
 # DateTime64(18) will always fail due to zero precision, but it is Ok to test here:
--- a/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.reference
+++ b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.reference
@ -310,10 +310,10 @@ SELECT subtractQuarters(N, 1)
 "DateTime('Europe/Minsk')","2019-06-16 19:20:11"
 "DateTime64(3, 'Europe/Minsk')","2019-06-16 19:20:11.234"
 ------------------------------------------
-SELECT CAST(N as DateTime)
-"DateTime","2019-09-16 00:00:00"
-"DateTime","2019-09-16 19:20:11"
-"DateTime","2019-09-16 19:20:11"
+SELECT CAST(N as DateTime(\'Europe/Minsk\'))
+"DateTime('Europe/Minsk')","2019-09-16 00:00:00"
+"DateTime('Europe/Minsk')","2019-09-16 19:20:11"
+"DateTime('Europe/Minsk')","2019-09-16 19:20:11"
 ------------------------------------------
 SELECT CAST(N as Date)
 "Date","2019-09-16"
@ -325,25 +325,25 @@ SELECT CAST(N as UInt64)
 "UInt64",1568650811
 "UInt64",1568650811
 ------------------------------------------
-SELECT CAST(N as DateTime64(0))
-"DateTime64(0)","2019-09-16 00:00:00"
-"DateTime64(0)","2019-09-16 19:20:11"
-"DateTime64(0)","2019-09-16 19:20:11"
+SELECT CAST(N as DateTime64(0, \'Europe/Minsk\'))
+"DateTime64(0, 'Europe/Minsk')","2019-09-16 00:00:00"
+"DateTime64(0, 'Europe/Minsk')","2019-09-16 19:20:11"
+"DateTime64(0, 'Europe/Minsk')","2019-09-16 19:20:11"
 ------------------------------------------
-SELECT CAST(N as DateTime64(3))
-"DateTime64(3)","2019-09-16 00:00:00.000"
-"DateTime64(3)","2019-09-16 19:20:11.000"
-"DateTime64(3)","2019-09-16 19:20:11.234"
+SELECT CAST(N as DateTime64(3, \'Europe/Minsk\'))
+"DateTime64(3, 'Europe/Minsk')","2019-09-16 00:00:00.000"
+"DateTime64(3, 'Europe/Minsk')","2019-09-16 19:20:11.000"
+"DateTime64(3, 'Europe/Minsk')","2019-09-16 19:20:11.234"
 ------------------------------------------
-SELECT CAST(N as DateTime64(6))
-"DateTime64(6)","2019-09-16 00:00:00.000000"
-"DateTime64(6)","2019-09-16 19:20:11.000000"
-"DateTime64(6)","2019-09-16 19:20:11.234000"
+SELECT CAST(N as DateTime64(6, \'Europe/Minsk\'))
+"DateTime64(6, 'Europe/Minsk')","2019-09-16 00:00:00.000000"
+"DateTime64(6, 'Europe/Minsk')","2019-09-16 19:20:11.000000"
+"DateTime64(6, 'Europe/Minsk')","2019-09-16 19:20:11.234000"
 ------------------------------------------
-SELECT CAST(N as DateTime64(9))
-"DateTime64(9)","2019-09-16 00:00:00.000000000"
-"DateTime64(9)","2019-09-16 19:20:11.000000000"
-"DateTime64(9)","2019-09-16 19:20:11.234000000"
+SELECT CAST(N as DateTime64(9, \'Europe/Minsk\'))
+"DateTime64(9, 'Europe/Minsk')","2019-09-16 00:00:00.000000000"
+"DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.000000000"
+"DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.234000000"
 ------------------------------------------
 SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\')
 "String","20 16 09/16/19 16 2019-09-16 00 12 259 09 00 AM 00:00 00 00:00:00 1 38 1 19 2019 %"
--- a/dbms/tests/queries/0_stateless/01101_prewhere_after_alter.reference
+++ b/dbms/tests/queries/0_stateless/01101_prewhere_after_alter.reference
@ -0,0 +1 @@
+2a
--- a/dbms/tests/queries/0_stateless/01101_prewhere_after_alter.sql
+++ b/dbms/tests/queries/0_stateless/01101_prewhere_after_alter.sql
@ -0,0 +1,36 @@
+DROP TABLE IF EXISTS test_a;
+DROP TABLE IF EXISTS test_b;
+
+CREATE TABLE test_a
+(
+    OldColumn String DEFAULT '',
+    EventDate Date DEFAULT toDate(EventTime),
+    EventTime DateTime
+) ENGINE = MergeTree(EventDate, EventTime, 8192);
+
+CREATE TABLE test_b
+(
+    OldColumn String DEFAULT '',
+    NewColumn String DEFAULT '',
+    EventDate Date DEFAULT toDate(EventTime),
+    EventTime DateTime
+) ENGINE = MergeTree(EventDate, EventTime, 8192);
+
+INSERT INTO test_a (OldColumn, EventTime) VALUES('1', now());
+
+INSERT INTO test_b (OldColumn, NewColumn, EventTime) VALUES('1', '1a', now());
+INSERT INTO test_b (OldColumn, NewColumn, EventTime) VALUES('2', '2a', now());
+
+ALTER TABLE test_a ADD COLUMN NewColumn String DEFAULT '' AFTER OldColumn;
+
+INSERT INTO test_a (OldColumn, NewColumn, EventTime) VALUES('2', '2a', now());
+
+SELECT NewColumn
+FROM test_a
+INNER JOIN
+(SELECT OldColumn, NewColumn FROM test_b) s
+Using OldColumn
+PREWHERE NewColumn != '';
+
+DROP TABLE test_a;
+DROP TABLE test_b;
--- a/dbms/tests/queries/0_stateless/01102_distributed_local_in_bug.reference
+++ b/dbms/tests/queries/0_stateless/01102_distributed_local_in_bug.reference
--- a/dbms/tests/queries/0_stateless/01102_distributed_local_in_bug.sql
+++ b/dbms/tests/queries/0_stateless/01102_distributed_local_in_bug.sql
@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS hits;
+DROP TABLE IF EXISTS visits;
+DROP TABLE IF EXISTS hits_layer;
+DROP TABLE IF EXISTS visits_layer;
+
+CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate);
+CREATE TABLE hits(EventDate Date, WatchID UInt8) ENGINE MergeTree ORDER BY(EventDate);
+
+CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost,  currentDatabase(), 'visits');
+CREATE TABLE hits_layer(EventDate Date, WatchID UInt8) ENGINE Distributed(test_cluster_two_shards_localhost,  currentDatabase(), 'hits');
+
+SET distributed_product_mode = 'local';
+
+SELECT 0 FROM hits_layer AS hl
+PREWHERE WatchID IN
+(
+    SELECT 0 FROM visits_layer AS vl
+)
+WHERE 0;
+
+DROP TABLE hits;
+DROP TABLE visits;
+DROP TABLE hits_layer;
+DROP TABLE visits_layer;
--- a/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.reference
+++ b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.reference
@ -1 +1,4 @@
 1
+1
+1
+1
--- a/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql
+++ b/dbms/tests/queries/0_stateless/01200_mutations_memory_consumption.sql
@ -1,6 +1,6 @@
-DROP TABLE IF EXISTS table_with_pk;
+DROP TABLE IF EXISTS table_with_single_pk;

-CREATE TABLE table_with_pk
+CREATE TABLE table_with_single_pk
 (
  key UInt8,
  value String
@ -8,9 +8,9 @@ CREATE TABLE table_with_pk
 ENGINE = MergeTree
 ORDER BY key;

-INSERT INTO table_with_pk SELECT number, toString(number % 10) FROM numbers(10000000);
+INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000);

-ALTER TABLE table_with_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1;
+ALTER TABLE table_with_single_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1;

 SYSTEM FLUSH LOGS;

@ -20,6 +20,92 @@ SELECT
  DISTINCT read_bytes >= peak_memory_usage
 FROM
    system.part_log
-WHERE event_type = 'MutatePart' AND table = 'table_with_pk' AND database = currentDatabase();
+WHERE event_type = 'MutatePart' AND table = 'table_with_single_pk' AND database = currentDatabase();

-DROP TABLE IF EXISTS table_with_pk;
+DROP TABLE IF EXISTS table_with_single_pk;
+
+DROP TABLE IF EXISTS table_with_multi_pk;
+
+CREATE TABLE table_with_multi_pk
+(
+  key1 UInt8,
+  key2 UInt32,
+  key3 DateTime64(6, 'UTC'),
+  value String
+)
+ENGINE = MergeTree
+ORDER BY (key1, key2, key3);
+
+INSERT INTO table_with_multi_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
+
+ALTER TABLE table_with_multi_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
+
+SYSTEM FLUSH LOGS;
+
+-- Memory usage for all mutations must be almost constant and less than
+-- read_bytes.
+SELECT
+  DISTINCT read_bytes >= peak_memory_usage
+  FROM
+      system.part_log
+ WHERE event_type = 'MutatePart' AND table = 'table_with_multi_pk' AND database = currentDatabase();
+
+DROP TABLE IF EXISTS table_with_multi_pk;
+
+
+DROP TABLE IF EXISTS table_with_function_pk;
+
+
+CREATE TABLE table_with_function_pk
+  (
+    key1 UInt8,
+    key2 UInt32,
+    key3 DateTime64(6, 'UTC'),
+    value String
+  )
+ENGINE = MergeTree
+ORDER BY (cast(value as UInt64), key2);
+
+INSERT INTO table_with_function_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
+
+ALTER TABLE table_with_function_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
+
+SYSTEM FLUSH LOGS;
+
+-- Memory usage for all mutations must be almost constant and less than
+-- read_bytes.
+SELECT
+  DISTINCT read_bytes >= peak_memory_usage
+  FROM
+      system.part_log
+ WHERE event_type = 'MutatePart' AND table = 'table_with_function_pk' AND database = currentDatabase();
+
+DROP TABLE IF EXISTS table_with_function_pk;
+
+DROP TABLE IF EXISTS table_without_pk;
+
+CREATE TABLE table_without_pk
+(
+  key1 UInt8,
+  key2 UInt32,
+  key3 DateTime64(6, 'UTC'),
+  value String
+)
+ENGINE = MergeTree
+ORDER BY tuple();
+
+INSERT INTO table_without_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
+
+ALTER TABLE table_without_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
+
+SYSTEM FLUSH LOGS;
+
+-- Memory usage for all mutations must be almost constant and less than
+-- read_bytes.
+SELECT
+  DISTINCT read_bytes >= peak_memory_usage
+  FROM
+      system.part_log
+ WHERE event_type = 'MutatePart' AND table = 'table_without_pk' AND database = currentDatabase();
+
+DROP TABLE IF EXISTS table_without_pk;
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -167,6 +167,8 @@ function get_profiles
    left/clickhouse client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
    right/clickhouse client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0"
    right/clickhouse client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
+    left/clickhouse client --port 9001 --query "system flush logs"
+    right/clickhouse client --port 9002 --query "system flush logs"

    left/clickhouse client --port 9001 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
    left/clickhouse client --port 9001 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
@ -395,7 +397,7 @@ unset IFS

 # Remember that grep sets error code when nothing is found, hence the bayan
 # operator.
-grep -H -m2 'Exception:[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||:
+grep -H -m2 '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||:
 }

 case "$stage" in