Merge branch 'master' of github.com:yandex/ClickHouse

2024-09-20 08:40:50 +00:00 · 2019-08-01 01:49:24 +03:00 · 2019-08-01 01:49:24 +03:00 · bf524b4419
commit bf524b4419
parent 63c57a4c26 a94fe354c5
9 changed files with 152 additions and 10 deletions
--- a/dbms/src/Functions/intDiv.h
+++ b/dbms/src/Functions/intDiv.h
@ -55,7 +55,13 @@ struct DivideIntegralImpl
    static inline Result apply(A a, B b)
    {
        throwIfDivisionLeadsToFPE(a, b);
-        return a / b;
+
+        /// Otherwise overflow may occur due to integer promotion. Example: int8_t(-1) / uint64_t(2).
+        /// NOTE: overflow is still possible when dividing large signed number to large unsigned number or vice-versa. But it's less harmful.
+        if constexpr (std::is_integral_v<A> && std::is_integral_v<B> && (std::is_signed_v<A> || std::is_signed_v<B>))
+            return std::make_signed_t<A>(a) / std::make_signed_t<B>(b);
+        else
+            return a / b;
    }

 #if USE_EMBEDDED_COMPILER
--- a/dbms/src/Functions/intDivOrZero.cpp
+++ b/dbms/src/Functions/intDivOrZero.cpp
@ -1,6 +1,9 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionBinaryArithmetic.h>

+#include "intDiv.h"
+
+
 namespace DB
 {

@ -12,7 +15,10 @@ struct DivideIntegralOrZeroImpl
    template <typename Result = ResultType>
    static inline Result apply(A a, B b)
    {
-        return unlikely(divisionLeadsToFPE(a, b)) ? 0 : a / b;
+        if (unlikely(divisionLeadsToFPE(a, b)))
+            return 0;
+
+        return DivideIntegralImpl<A, B>::template apply<Result>(a, b);
    }

 #if USE_EMBEDDED_COMPILER
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@ -35,6 +35,8 @@ namespace ErrorCodes
    extern const int TYPE_MISMATCH;
 }

+/// Read comment near usage
+static constexpr auto DUMMY_COLUMN_NAME = "_dummy";

 Names ExpressionAction::getNeededColumns() const
 {
@ -508,11 +510,15 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
            if (can_replace && block.has(result_name))
            {
                auto & result = block.getByName(result_name);
-                result.type = result_type;
-                result.column = block.getByName(source_name).column;
+                const auto & source = block.getByName(source_name);
+                result.type = source.type;
+                result.column = source.column;
            }
            else
-                block.insert({ block.getByName(source_name).column, result_type, result_name });
+            {
+                const auto & source_column = block.getByName(source_name);
+                block.insert({source_column.column, source_column.type, result_name});
+            }

            break;
    }
@ -927,13 +933,44 @@ void ExpressionActions::finalize(const Names & output_columns)
        }
    }

+
+    /// 1) Sometimes we don't need any columns to perform actions and sometimes actions doesn't produce any columns as result.
+    /// But Block class doesn't store any information about structure itself, it uses information from column.
+    /// If we remove all columns from input or output block we will lose information about amount of rows in it.
+    /// To avoid this situation we always leaving one of the columns in required columns (input)
+    /// and output column. We choose that "redundant" column by size with help of getSmallestColumn.
+    ///
+    /// 2) Sometimes we have to read data from different Storages to execute query.
+    /// For example in 'remote' function which requires to read data from local table (for example MergeTree) and
+    /// remote table (doesn't know anything about it).
+    ///
+    /// If we have combination of two previous cases, our heuristic from (1) can choose absolutely different columns,
+    /// so generated streams with these actions will have different headers. To avoid this we addionaly rename our "redundant" column
+    /// to DUMMY_COLUMN_NAME with help of COPY_COLUMN action and consequent remove of original column.
+    /// It doesn't affect any logic, but all streams will have same "redundant" column in header called "_dummy".
+
+    /// Also, it seems like we will always have same type (UInt8) of "redundant" column, but it's not obvious.
+
+    bool dummy_column_copied = false;
+
+
    /// We will not throw out all the input columns, so as not to lose the number of rows in the block.
    if (needed_columns.empty() && !input_columns.empty())
-        needed_columns.insert(getSmallestColumn(input_columns));
+    {
+        auto colname = getSmallestColumn(input_columns);
+        needed_columns.insert(colname);
+        actions.insert(actions.begin(), ExpressionAction::copyColumn(colname, DUMMY_COLUMN_NAME, true));
+        dummy_column_copied = true;
+    }

    /// We will not leave the block empty so as not to lose the number of rows in it.
    if (final_columns.empty() && !input_columns.empty())
-        final_columns.insert(getSmallestColumn(input_columns));
+    {
+        auto colname = getSmallestColumn(input_columns);
+        final_columns.insert(DUMMY_COLUMN_NAME);
+        if (!dummy_column_copied) /// otherwise we already have this column
+            actions.insert(actions.begin(), ExpressionAction::copyColumn(colname, DUMMY_COLUMN_NAME, true));
+    }

    for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();)
    {
@ -948,9 +985,9 @@ void ExpressionActions::finalize(const Names & output_columns)
    }

 /*    std::cerr << "\n";
-    for (const auto & action : actions)
-        std::cerr << action.toString() << "\n";
-    std::cerr << "\n";*/
+      for (const auto & action : actions)
+          std::cerr << action.toString() << "\n";
+      std::cerr << "\n";*/

    /// Deletes unnecessary temporary columns.

--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@ -257,9 +257,13 @@ public:
    };

 private:
+    /// These columns have to be in input blocks (arguments of execute* methods)
    NamesAndTypesList input_columns;
+    /// These actions will be executed on input blocks
    Actions actions;
+    /// The example of result (output) block.
    Block sample_block;
+
    Settings settings;
 #if USE_EMBEDDED_COMPILER
    std::shared_ptr<CompiledExpressionCache> compilation_cache;
--- a/dbms/tests/integration/test_remote_prewhere/init.py
+++ b/dbms/tests/integration/test_remote_prewhere/init.py
--- a/dbms/tests/integration/test_remote_prewhere/configs/log_conf.xml
+++ b/dbms/tests/integration/test_remote_prewhere/configs/log_conf.xml
@ -0,0 +1,12 @@
+<yandex>
+<shutdown_wait_unfinished>3</shutdown_wait_unfinished>
+<logger>
+    <level>trace</level>
+    <log>/var/log/clickhouse-server/log.log</log>
+    <errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
+    <size>1000M</size>
+    <count>10</count>
+    <stderr>/var/log/clickhouse-server/stderr.log</stderr>
+    <stdout>/var/log/clickhouse-server/stdout.log</stdout>
+    </logger>
+    </yandex>
--- a/dbms/tests/integration/test_remote_prewhere/test.py
+++ b/dbms/tests/integration/test_remote_prewhere/test.py
@ -0,0 +1,35 @@
+import time
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException, QueryTimeoutExceedException
+
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance('node1', main_configs=['configs/log_conf.xml'])
+node2 = cluster.add_instance('node2', main_configs=['configs/log_conf.xml'])
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+
+        for node in [node1, node2]:
+            node.query("""
+                CREATE TABLE test_table(
+                    APIKey UInt32,
+                    CustomAttributeId UInt64,
+                    ProfileIDHash UInt64,
+                    DeviceIDHash UInt64,
+                    Data String)
+                ENGINE = SummingMergeTree()
+                ORDER BY (APIKey, CustomAttributeId, ProfileIDHash, DeviceIDHash, intHash32(DeviceIDHash))
+            """)
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_remote(start_cluster):
+    assert node1.query("SELECT 1 FROM remote('node{1,2}', default.test_table) WHERE (APIKey = 137715) AND (CustomAttributeId IN (45, 66)) AND (ProfileIDHash != 0) LIMIT 1") == ""
--- a/dbms/tests/queries/0_stateless/00977_int_div.reference
+++ b/dbms/tests/queries/0_stateless/00977_int_div.reference
@ -0,0 +1,23 @@
+-2000	-1	1
+-1
+-1
+-1
+0
+0
+0
+0
+0
+0
+0
+0
+0
+-1
+0
+0
+0
+0
+0
+0
+0
+0
+0
--- a/dbms/tests/queries/0_stateless/00977_int_div.sql
+++ b/dbms/tests/queries/0_stateless/00977_int_div.sql
@ -0,0 +1,19 @@
+SELECT 
+    sum(ASD) AS asd, 
+    intDiv(toInt64(asd), abs(toInt64(asd))) AS int_div_with_abs, 
+    intDiv(toInt64(asd), toInt64(asd)) AS int_div_without_abs
+FROM 
+(
+    SELECT ASD
+    FROM 
+    (
+        SELECT [-1000, -1000] AS asds
+    ) 
+    ARRAY JOIN asds AS ASD
+);
+
+SELECT  intDivOrZero( CAST(-1000, 'Int64')   , CAST(1000, 'UInt64') );
+SELECT  intDivOrZero( CAST(-1000, 'Int64')   , CAST(1000, 'Int64') );
+
+SELECT intDiv(-1, number) FROM numbers(1, 10);
+SELECT intDivOrZero(-1, number) FROM numbers(1, 10);