Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Alexey Milovidov 2020-03-26 13:33:11 +03:00
commit 656234ab5c
18 changed files with 230 additions and 60 deletions

View File

@ -833,6 +833,19 @@ public:
type_res = std::make_shared<LeftDataType>(left.getPrecision(), left.getScale());
else if constexpr (IsDataTypeDecimal<RightDataType>)
type_res = std::make_shared<RightDataType>(right.getPrecision(), right.getScale());
else if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime>)
{
// Special case for DateTime: binary OPS should reuse timezone
// of DateTime argument as timezeone of result type.
// NOTE: binary plus/minus are not allowed on DateTime64, and we are not handling it here.
const TimezoneMixin * tz = nullptr;
if constexpr (std::is_same_v<RightDataType, DataTypeDateTime>)
tz = &right;
if constexpr (std::is_same_v<LeftDataType, DataTypeDateTime>)
tz = &left;
type_res = std::make_shared<ResultDataType>(*tz);
}
else
type_res = std::make_shared<ResultDataType>();
return true;

View File

@ -42,9 +42,9 @@ std::string extractTimeZoneNameFromFunctionArguments(const ColumnsWithTypeAndNam
return {};
/// If time zone is attached to an argument of type DateTime.
if (const DataTypeDateTime * type = checkAndGetDataType<DataTypeDateTime>(arguments[datetime_arg_num].type.get()))
if (const auto * type = checkAndGetDataType<DataTypeDateTime>(arguments[datetime_arg_num].type.get()))
return type->getTimeZone().getTimeZone();
if (const DataTypeDateTime64 * type = checkAndGetDataType<DataTypeDateTime64>(arguments[datetime_arg_num].type.get()))
if (const auto * type = checkAndGetDataType<DataTypeDateTime64>(arguments[datetime_arg_num].type.get()))
return type->getTimeZone().getTimeZone();
return {};
@ -61,7 +61,9 @@ const DateLUTImpl & extractTimeZoneFromFunctionArguments(Block & block, const Co
return DateLUT::instance();
/// If time zone is attached to an argument of type DateTime.
if (const DataTypeDateTime * type = checkAndGetDataType<DataTypeDateTime>(block.getByPosition(arguments[datetime_arg_num]).type.get()))
if (const auto * type = checkAndGetDataType<DataTypeDateTime>(block.getByPosition(arguments[datetime_arg_num]).type.get()))
return type->getTimeZone();
if (const auto * type = checkAndGetDataType<DataTypeDateTime64>(block.getByPosition(arguments[datetime_arg_num]).type.get()))
return type->getTimeZone();
return DateLUT::instance();

View File

@ -314,7 +314,7 @@ public:
size_t result_size = pattern_to_fill.size();
const DateLUTImpl * time_zone_tmp = nullptr;
if (arguments.size() == 3)
if (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
else
time_zone_tmp = &DateLUT::instance();

View File

@ -98,8 +98,8 @@ private:
throw Exception("Distributed table should have an alias when distributed_product_mode set to local.",
ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED);
database_and_table = createTableIdentifier(database, table);
database_and_table->setAlias(alias);
auto & identifier = database_and_table->as<ASTIdentifier &>();
identifier.resetTable(database, table);
}
else
throw Exception("InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting",

View File

@ -620,38 +620,23 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
}
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
}
auto metadata = storage->getInMemoryMetadata();
/// We have to execute select in order of primary key
/// because we don't sort results additionaly and don't have
/// any guarantees on data order without ORDER BY. It's almost free, because we
/// have optimization for data read in primary key order.
if (metadata.order_by_ast)
if (ASTPtr key_expr = storage->getSortingKeyAST(); key_expr && !key_expr->children.empty())
{
ASTPtr dummy;
ASTPtr key_expr;
if (metadata.primary_key_ast)
key_expr = metadata.primary_key_ast;
else
key_expr = metadata.order_by_ast;
bool empty = false;
/// In all other cases we cannot have empty key
if (auto key_function = key_expr->as<ASTFunction>())
empty = key_function->arguments->children.empty();
/// Not explicitely spicified empty key
if (!empty)
auto res = std::make_shared<ASTExpressionList>();
for (const auto & key_part : key_expr->children)
{
auto order_by_expr = std::make_shared<ASTOrderByElement>(1, 1, false, dummy, false, dummy, dummy, dummy);
order_by_expr->children.push_back(key_part);
order_by_expr->children.push_back(key_expr);
auto res = std::make_shared<ASTExpressionList>();
res->children.push_back(order_by_expr);
select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
}
select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
}
return select;

View File

@ -101,6 +101,15 @@ void ASTIdentifier::appendColumnNameImpl(WriteBuffer & ostr) const
writeString(name, ostr);
}
void ASTIdentifier::resetTable(const String & database_name, const String & table_name)
{
auto ast = createTableIdentifier(database_name, table_name);
auto & ident = ast->as<ASTIdentifier &>();
name.swap(ident.name);
name_parts.swap(ident.name_parts);
uuid = ident.uuid;
}
ASTPtr createTableIdentifier(const String & database_name, const String & table_name)
{
assert(database_name != "_temporary_and_external_tables");

View File

@ -49,6 +49,8 @@ public:
return name;
}
void resetTable(const String & database_name, const String & table_name);
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;

View File

@ -13,3 +13,10 @@ fun:BN_add_word
fun:bn_div_fixed_top
fun:bn_mul_words
fun:BN_cmp
# Suppress some failures in contrib so that we can enable MSan in CI.
# Ideally, we should report these upstream.
src:*/contrib/zlib-ng/*
src:*/contrib/openssl/*
src:*/contrib/simdjson/*

View File

@ -1,3 +1,3 @@
2019-09-16 19:20:11.000
2019-05-03 11:25:25.123 2019-05-03 2019-05-02 21:00:00 2019-04-01 1970-01-02 11:25:25 2019-05-03 11:25:00
2019-05-03 11:25:25.123 2019-05-03 2019-05-03 00:00:00 2019-04-01 1970-01-02 11:25:25 2019-05-03 11:25:00
2019-09-16 19:20:11.234

View File

@ -68,13 +68,13 @@ subtractHours(N, 1)
subtractMinutes(N, 1)
subtractSeconds(N, 1)
subtractQuarters(N, 1)
CAST(N as DateTime)
CAST(N as DateTime('Europe/Minsk'))
CAST(N as Date)
CAST(N as UInt64)
CAST(N as DateTime64(0))
CAST(N as DateTime64(3))
CAST(N as DateTime64(6))
CAST(N as DateTime64(9))
CAST(N as DateTime64(0, 'Europe/Minsk'))
CAST(N as DateTime64(3, 'Europe/Minsk'))
CAST(N as DateTime64(6, 'Europe/Minsk'))
CAST(N as DateTime64(9, 'Europe/Minsk'))
# Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer.
# CAST(N as DateTime64(12))
# DateTime64(18) will always fail due to zero precision, but it is Ok to test here:

View File

@ -310,10 +310,10 @@ SELECT subtractQuarters(N, 1)
"DateTime('Europe/Minsk')","2019-06-16 19:20:11"
"DateTime64(3, 'Europe/Minsk')","2019-06-16 19:20:11.234"
------------------------------------------
SELECT CAST(N as DateTime)
"DateTime","2019-09-16 00:00:00"
"DateTime","2019-09-16 19:20:11"
"DateTime","2019-09-16 19:20:11"
SELECT CAST(N as DateTime(\'Europe/Minsk\'))
"DateTime('Europe/Minsk')","2019-09-16 00:00:00"
"DateTime('Europe/Minsk')","2019-09-16 19:20:11"
"DateTime('Europe/Minsk')","2019-09-16 19:20:11"
------------------------------------------
SELECT CAST(N as Date)
"Date","2019-09-16"
@ -325,25 +325,25 @@ SELECT CAST(N as UInt64)
"UInt64",1568650811
"UInt64",1568650811
------------------------------------------
SELECT CAST(N as DateTime64(0))
"DateTime64(0)","2019-09-16 00:00:00"
"DateTime64(0)","2019-09-16 19:20:11"
"DateTime64(0)","2019-09-16 19:20:11"
SELECT CAST(N as DateTime64(0, \'Europe/Minsk\'))
"DateTime64(0, 'Europe/Minsk')","2019-09-16 00:00:00"
"DateTime64(0, 'Europe/Minsk')","2019-09-16 19:20:11"
"DateTime64(0, 'Europe/Minsk')","2019-09-16 19:20:11"
------------------------------------------
SELECT CAST(N as DateTime64(3))
"DateTime64(3)","2019-09-16 00:00:00.000"
"DateTime64(3)","2019-09-16 19:20:11.000"
"DateTime64(3)","2019-09-16 19:20:11.234"
SELECT CAST(N as DateTime64(3, \'Europe/Minsk\'))
"DateTime64(3, 'Europe/Minsk')","2019-09-16 00:00:00.000"
"DateTime64(3, 'Europe/Minsk')","2019-09-16 19:20:11.000"
"DateTime64(3, 'Europe/Minsk')","2019-09-16 19:20:11.234"
------------------------------------------
SELECT CAST(N as DateTime64(6))
"DateTime64(6)","2019-09-16 00:00:00.000000"
"DateTime64(6)","2019-09-16 19:20:11.000000"
"DateTime64(6)","2019-09-16 19:20:11.234000"
SELECT CAST(N as DateTime64(6, \'Europe/Minsk\'))
"DateTime64(6, 'Europe/Minsk')","2019-09-16 00:00:00.000000"
"DateTime64(6, 'Europe/Minsk')","2019-09-16 19:20:11.000000"
"DateTime64(6, 'Europe/Minsk')","2019-09-16 19:20:11.234000"
------------------------------------------
SELECT CAST(N as DateTime64(9))
"DateTime64(9)","2019-09-16 00:00:00.000000000"
"DateTime64(9)","2019-09-16 19:20:11.000000000"
"DateTime64(9)","2019-09-16 19:20:11.234000000"
SELECT CAST(N as DateTime64(9, \'Europe/Minsk\'))
"DateTime64(9, 'Europe/Minsk')","2019-09-16 00:00:00.000000000"
"DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.000000000"
"DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.234000000"
------------------------------------------
SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\')
"String","20 16 09/16/19 16 2019-09-16 00 12 259 09 00 AM 00:00 00 00:00:00 1 38 1 19 2019 %"

View File

@ -0,0 +1,36 @@
DROP TABLE IF EXISTS test_a;
DROP TABLE IF EXISTS test_b;
CREATE TABLE test_a
(
OldColumn String DEFAULT '',
EventDate Date DEFAULT toDate(EventTime),
EventTime DateTime
) ENGINE = MergeTree(EventDate, EventTime, 8192);
CREATE TABLE test_b
(
OldColumn String DEFAULT '',
NewColumn String DEFAULT '',
EventDate Date DEFAULT toDate(EventTime),
EventTime DateTime
) ENGINE = MergeTree(EventDate, EventTime, 8192);
INSERT INTO test_a (OldColumn, EventTime) VALUES('1', now());
INSERT INTO test_b (OldColumn, NewColumn, EventTime) VALUES('1', '1a', now());
INSERT INTO test_b (OldColumn, NewColumn, EventTime) VALUES('2', '2a', now());
ALTER TABLE test_a ADD COLUMN NewColumn String DEFAULT '' AFTER OldColumn;
INSERT INTO test_a (OldColumn, NewColumn, EventTime) VALUES('2', '2a', now());
SELECT NewColumn
FROM test_a
INNER JOIN
(SELECT OldColumn, NewColumn FROM test_b) s
Using OldColumn
PREWHERE NewColumn != '';
DROP TABLE test_a;
DROP TABLE test_b;

View File

@ -0,0 +1,24 @@
DROP TABLE IF EXISTS hits;
DROP TABLE IF EXISTS visits;
DROP TABLE IF EXISTS hits_layer;
DROP TABLE IF EXISTS visits_layer;
CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate);
CREATE TABLE hits(EventDate Date, WatchID UInt8) ENGINE MergeTree ORDER BY(EventDate);
CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'visits');
CREATE TABLE hits_layer(EventDate Date, WatchID UInt8) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'hits');
SET distributed_product_mode = 'local';
SELECT 0 FROM hits_layer AS hl
PREWHERE WatchID IN
(
SELECT 0 FROM visits_layer AS vl
)
WHERE 0;
DROP TABLE hits;
DROP TABLE visits;
DROP TABLE hits_layer;
DROP TABLE visits_layer;

View File

@ -1,6 +1,6 @@
DROP TABLE IF EXISTS table_with_pk;
DROP TABLE IF EXISTS table_with_single_pk;
CREATE TABLE table_with_pk
CREATE TABLE table_with_single_pk
(
key UInt8,
value String
@ -8,9 +8,9 @@ CREATE TABLE table_with_pk
ENGINE = MergeTree
ORDER BY key;
INSERT INTO table_with_pk SELECT number, toString(number % 10) FROM numbers(10000000);
INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_with_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1;
ALTER TABLE table_with_single_pk DELETE WHERE key % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
@ -20,6 +20,92 @@ SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_with_pk' AND database = currentDatabase();
WHERE event_type = 'MutatePart' AND table = 'table_with_single_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_with_pk;
DROP TABLE IF EXISTS table_with_single_pk;
DROP TABLE IF EXISTS table_with_multi_pk;
CREATE TABLE table_with_multi_pk
(
key1 UInt8,
key2 UInt32,
key3 DateTime64(6, 'UTC'),
value String
)
ENGINE = MergeTree
ORDER BY (key1, key2, key3);
INSERT INTO table_with_multi_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_with_multi_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
-- Memory usage for all mutations must be almost constant and less than
-- read_bytes.
SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_with_multi_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_with_multi_pk;
DROP TABLE IF EXISTS table_with_function_pk;
CREATE TABLE table_with_function_pk
(
key1 UInt8,
key2 UInt32,
key3 DateTime64(6, 'UTC'),
value String
)
ENGINE = MergeTree
ORDER BY (cast(value as UInt64), key2);
INSERT INTO table_with_function_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_with_function_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
-- Memory usage for all mutations must be almost constant and less than
-- read_bytes.
SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_with_function_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_with_function_pk;
DROP TABLE IF EXISTS table_without_pk;
CREATE TABLE table_without_pk
(
key1 UInt8,
key2 UInt32,
key3 DateTime64(6, 'UTC'),
value String
)
ENGINE = MergeTree
ORDER BY tuple();
INSERT INTO table_without_pk SELECT number % 32, number, toDateTime('2019-10-01 00:00:00'), toString(number % 10) FROM numbers(10000000);
ALTER TABLE table_without_pk DELETE WHERE key1 % 77 = 0 SETTINGS mutations_sync = 1;
SYSTEM FLUSH LOGS;
-- Memory usage for all mutations must be almost constant and less than
-- read_bytes.
SELECT
DISTINCT read_bytes >= peak_memory_usage
FROM
system.part_log
WHERE event_type = 'MutatePart' AND table = 'table_without_pk' AND database = currentDatabase();
DROP TABLE IF EXISTS table_without_pk;

View File

@ -167,6 +167,8 @@ function get_profiles
left/clickhouse client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
right/clickhouse client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0"
right/clickhouse client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
left/clickhouse client --port 9001 --query "system flush logs"
right/clickhouse client --port 9002 --query "system flush logs"
left/clickhouse client --port 9001 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
left/clickhouse client --port 9001 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
@ -395,7 +397,7 @@ unset IFS
# Remember that grep sets error code when nothing is found, hence the bayan
# operator.
grep -H -m2 'Exception:[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||:
grep -H -m2 '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||:
}
case "$stage" in