Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Alexey Milovidov 2019-08-01 01:49:24 +03:00
commit bf524b4419
9 changed files with 152 additions and 10 deletions

View File

@ -55,7 +55,13 @@ struct DivideIntegralImpl
static inline Result apply(A a, B b)
{
throwIfDivisionLeadsToFPE(a, b);
return a / b;
/// Otherwise overflow may occur due to integer promotion. Example: int8_t(-1) / uint64_t(2).
/// NOTE: overflow is still possible when dividing large signed number to large unsigned number or vice-versa. But it's less harmful.
if constexpr (std::is_integral_v<A> && std::is_integral_v<B> && (std::is_signed_v<A> || std::is_signed_v<B>))
return std::make_signed_t<A>(a) / std::make_signed_t<B>(b);
else
return a / b;
}
#if USE_EMBEDDED_COMPILER

View File

@ -1,6 +1,9 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionBinaryArithmetic.h>
#include "intDiv.h"
namespace DB
{
@ -12,7 +15,10 @@ struct DivideIntegralOrZeroImpl
template <typename Result = ResultType>
static inline Result apply(A a, B b)
{
return unlikely(divisionLeadsToFPE(a, b)) ? 0 : a / b;
if (unlikely(divisionLeadsToFPE(a, b)))
return 0;
return DivideIntegralImpl<A, B>::template apply<Result>(a, b);
}
#if USE_EMBEDDED_COMPILER

View File

@ -35,6 +35,8 @@ namespace ErrorCodes
extern const int TYPE_MISMATCH;
}
/// Read comment near usage
static constexpr auto DUMMY_COLUMN_NAME = "_dummy";
Names ExpressionAction::getNeededColumns() const
{
@ -508,11 +510,15 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
if (can_replace && block.has(result_name))
{
auto & result = block.getByName(result_name);
result.type = result_type;
result.column = block.getByName(source_name).column;
const auto & source = block.getByName(source_name);
result.type = source.type;
result.column = source.column;
}
else
block.insert({ block.getByName(source_name).column, result_type, result_name });
{
const auto & source_column = block.getByName(source_name);
block.insert({source_column.column, source_column.type, result_name});
}
break;
}
@ -927,13 +933,44 @@ void ExpressionActions::finalize(const Names & output_columns)
}
}
/// 1) Sometimes we don't need any columns to perform actions and sometimes actions doesn't produce any columns as result.
/// But Block class doesn't store any information about structure itself, it uses information from column.
/// If we remove all columns from input or output block we will lose information about amount of rows in it.
/// To avoid this situation we always leaving one of the columns in required columns (input)
/// and output column. We choose that "redundant" column by size with help of getSmallestColumn.
///
/// 2) Sometimes we have to read data from different Storages to execute query.
/// For example in 'remote' function which requires to read data from local table (for example MergeTree) and
/// remote table (doesn't know anything about it).
///
/// If we have combination of two previous cases, our heuristic from (1) can choose absolutely different columns,
/// so generated streams with these actions will have different headers. To avoid this we addionaly rename our "redundant" column
/// to DUMMY_COLUMN_NAME with help of COPY_COLUMN action and consequent remove of original column.
/// It doesn't affect any logic, but all streams will have same "redundant" column in header called "_dummy".
/// Also, it seems like we will always have same type (UInt8) of "redundant" column, but it's not obvious.
bool dummy_column_copied = false;
/// We will not throw out all the input columns, so as not to lose the number of rows in the block.
if (needed_columns.empty() && !input_columns.empty())
needed_columns.insert(getSmallestColumn(input_columns));
{
auto colname = getSmallestColumn(input_columns);
needed_columns.insert(colname);
actions.insert(actions.begin(), ExpressionAction::copyColumn(colname, DUMMY_COLUMN_NAME, true));
dummy_column_copied = true;
}
/// We will not leave the block empty so as not to lose the number of rows in it.
if (final_columns.empty() && !input_columns.empty())
final_columns.insert(getSmallestColumn(input_columns));
{
auto colname = getSmallestColumn(input_columns);
final_columns.insert(DUMMY_COLUMN_NAME);
if (!dummy_column_copied) /// otherwise we already have this column
actions.insert(actions.begin(), ExpressionAction::copyColumn(colname, DUMMY_COLUMN_NAME, true));
}
for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();)
{
@ -948,9 +985,9 @@ void ExpressionActions::finalize(const Names & output_columns)
}
/* std::cerr << "\n";
for (const auto & action : actions)
std::cerr << action.toString() << "\n";
std::cerr << "\n";*/
for (const auto & action : actions)
std::cerr << action.toString() << "\n";
std::cerr << "\n";*/
/// Deletes unnecessary temporary columns.

View File

@ -257,9 +257,13 @@ public:
};
private:
/// These columns have to be in input blocks (arguments of execute* methods)
NamesAndTypesList input_columns;
/// These actions will be executed on input blocks
Actions actions;
/// The example of result (output) block.
Block sample_block;
Settings settings;
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compilation_cache;

View File

@ -0,0 +1,12 @@
<yandex>
<shutdown_wait_unfinished>3</shutdown_wait_unfinished>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/log.log</log>
<errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
<size>1000M</size>
<count>10</count>
<stderr>/var/log/clickhouse-server/stderr.log</stderr>
<stdout>/var/log/clickhouse-server/stdout.log</stdout>
</logger>
</yandex>

View File

@ -0,0 +1,35 @@
import time
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.client import QueryRuntimeException, QueryTimeoutExceedException
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/log_conf.xml'])
node2 = cluster.add_instance('node2', main_configs=['configs/log_conf.xml'])
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
for node in [node1, node2]:
node.query("""
CREATE TABLE test_table(
APIKey UInt32,
CustomAttributeId UInt64,
ProfileIDHash UInt64,
DeviceIDHash UInt64,
Data String)
ENGINE = SummingMergeTree()
ORDER BY (APIKey, CustomAttributeId, ProfileIDHash, DeviceIDHash, intHash32(DeviceIDHash))
""")
yield cluster
finally:
cluster.shutdown()
def test_remote(start_cluster):
assert node1.query("SELECT 1 FROM remote('node{1,2}', default.test_table) WHERE (APIKey = 137715) AND (CustomAttributeId IN (45, 66)) AND (ProfileIDHash != 0) LIMIT 1") == ""

View File

@ -0,0 +1,23 @@
-2000 -1 1
-1
-1
-1
0
0
0
0
0
0
0
0
0
-1
0
0
0
0
0
0
0
0
0

View File

@ -0,0 +1,19 @@
SELECT
sum(ASD) AS asd,
intDiv(toInt64(asd), abs(toInt64(asd))) AS int_div_with_abs,
intDiv(toInt64(asd), toInt64(asd)) AS int_div_without_abs
FROM
(
SELECT ASD
FROM
(
SELECT [-1000, -1000] AS asds
)
ARRAY JOIN asds AS ASD
);
SELECT intDivOrZero( CAST(-1000, 'Int64') , CAST(1000, 'UInt64') );
SELECT intDivOrZero( CAST(-1000, 'Int64') , CAST(1000, 'Int64') );
SELECT intDiv(-1, number) FROM numbers(1, 10);
SELECT intDivOrZero(-1, number) FROM numbers(1, 10);