Merge branch 'master' into multi_auth_methods

This commit is contained in:
Arthur Passos 2024-08-30 16:59:40 -03:00
commit a22f9fd91f
26 changed files with 168 additions and 91 deletions

View File

@ -62,6 +62,7 @@ Other upcoming meetups
* [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31
* [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26

2
contrib/libfiu vendored

@ -1 +1 @@
Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5
Subproject commit a1290d8cd3d7b4541d6c976e0a54f572ac03f2a3

View File

@ -13,7 +13,8 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh"
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
numactl --hardware
echo > compare.log
numactl --hardware | tee -a compare.log
node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') ));
echo Will bind to NUMA node $node;
echo Will bind to NUMA node $node | tee -a compare.log
numactl --cpunodebind=$node --membind=$node $entry

View File

@ -49,6 +49,55 @@ SETTINGS cast_keep_nullable = 1
└──────────────────┴─────────────────────┴──────────────────┘
```
## toBool
Converts an input value to a value of type [`Bool`](../data-types/boolean.md). Throws an exception in case of an error.
**Syntax**
```sql
toBool(expr)
```
**Arguments**
- `expr` — Expression returning a number or a string. [Expression](../syntax.md/#syntax-expressions).
Supported arguments:
- Values of type (U)Int8/16/32/64/128/256.
- Values of type Float32/64.
- Strings `true` or `false` (case-insensitive).
**Returned value**
- Returns `true` or `false` based on evaluation of the argument. [Bool](../data-types/boolean.md).
**Example**
Query:
```sql
SELECT
toBool(toUInt8(1)),
toBool(toInt8(-1)),
toBool(toFloat32(1.01)),
toBool('true'),
toBool('false'),
toBool('FALSE')
FORMAT Vertical
```
Result:
```response
toBool(toUInt8(1)): true
toBool(toInt8(-1)): true
toBool(toFloat32(1.01)): true
toBool('true'): true
toBool('false'): false
toBool('FALSE'): false
```
## toInt8
Converts an input value to a value of type [`Int8`](../data-types/int-uint.md). Throws an exception in case of an error.

View File

@ -10,7 +10,7 @@ title: The Lightweight DELETE Statement
The lightweight `DELETE` statement removes rows from the table `[db.]table` that match the expression `expr`. It is only available for the *MergeTree table engine family.
``` sql
DELETE FROM [db.]table [ON CLUSTER cluster] WHERE expr;
DELETE FROM [db.]table [ON CLUSTER cluster] [IN PARTITION partition_expr] WHERE expr;
```
It is called "lightweight `DELETE`" to contrast it to the [ALTER table DELETE](/en/sql-reference/statements/alter/delete) command, which is a heavyweight process.

View File

@ -35,9 +35,10 @@ class RegionsNames
M(et, ru, 11) \
M(pt, en, 12) \
M(he, en, 13) \
M(vi, en, 14)
M(vi, en, 14) \
M(es, en, 15)
static constexpr size_t total_languages = 15;
static constexpr size_t total_languages = 16;
public:
enum class Language : size_t

View File

@ -1009,8 +1009,14 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
size_t num_rows = executor.execute(*buffer);
total_rows += num_rows;
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
/// for some reason, client can pass zero rows and bytes to server.
/// We don't update offsets in this case, because we assume every insert has some rows during dedup
/// but we have nothing to deduplicate for this insert.
if (num_rows > 0)
{
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
}
add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
@ -1061,8 +1067,14 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());
total_rows += block->rows();
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
/// for some reason, client can pass zero rows and bytes to server.
/// We don't update offsets in this case, because we assume every insert has some rows during dedup,
/// but we have nothing to deduplicate for this insert.
if (block->rows())
{
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
}
const auto & query_for_logging = get_query_by_format(entry->format);
add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);

View File

@ -107,7 +107,9 @@ BlockIO InterpreterDeleteQuery::execute()
String alter_query =
"ALTER TABLE " + table->getStorageID().getFullTableName()
+ (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
+ " UPDATE `_row_exists` = 0 WHERE " + serializeAST(*delete_query.predicate);
+ " UPDATE `_row_exists` = 0"
+ (delete_query.partition ? " IN PARTITION " + serializeAST(*delete_query.partition) : "")
+ " WHERE " + serializeAST(*delete_query.predicate);
ParserAlterQuery parser;
ASTPtr alter_ast = parseQuery(

View File

@ -45,6 +45,12 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
formatOnCluster(settings);
if (partition)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
}
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
predicate->formatImpl(settings, state, frame);
}

View File

@ -19,6 +19,11 @@ public:
return removeOnCluster<ASTDeleteQuery>(clone(), params.default_database);
}
/** Used in DELETE FROM queries.
* The value or ID of the partition is stored here.
*/
ASTPtr partition;
ASTPtr predicate;
protected:

View File

@ -3,6 +3,7 @@
#include <Parsers/parseDatabaseAndTableName.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/ParserPartition.h>
namespace DB
@ -15,11 +16,14 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserKeyword s_delete(Keyword::DELETE);
ParserKeyword s_from(Keyword::FROM);
ParserKeyword s_in_partition(Keyword::IN_PARTITION);
ParserKeyword s_where(Keyword::WHERE);
ParserExpression parser_exp_elem;
ParserKeyword s_settings(Keyword::SETTINGS);
ParserKeyword s_on{Keyword::ON};
ParserPartition parser_partition;
if (s_delete.ignore(pos, expected))
{
if (!s_from.ignore(pos, expected))
@ -36,6 +40,12 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->cluster = cluster_str;
}
if (s_in_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, query->partition, expected))
return false;
}
if (!s_where.ignore(pos, expected))
return false;
@ -53,6 +63,9 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
else
return false;
if (query->partition)
query->children.push_back(query->partition);
if (query->predicate)
query->children.push_back(query->predicate);

View File

@ -1,11 +0,0 @@
<clickhouse>
<remote_servers>
<test_cluster>
<shard>
<replica>
<host>node</host>
</replica>
</shard>
</test_cluster>
</remote_servers>
</clickhouse>

View File

@ -1,9 +0,0 @@
<clickhouse>
<logger>
<level>information</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
</clickhouse>

View File

@ -1,54 +0,0 @@
import logging
import pytest
from helpers.cluster import ClickHouseCluster
@pytest.fixture(scope="module")
def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance(
"node",
main_configs=[
"configs/config.d/cluster.xml",
],
)
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
node = cluster.instances["node"]
node.query(
"""
CREATE TABLE tab
(
a DateTime,
pk String
) Engine = MergeTree() ORDER BY pk;
"""
)
yield cluster
finally:
cluster.shutdown()
def test_incorrect_datetime_format(cluster):
"""
Test for an MSan issue which is caused by parsing incorrect datetime string
"""
node = cluster.instances["node"]
res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip()
assert res == "0"
error = node.query_and_get_error(
"SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'"
).strip()
assert "Cannot parse time component of DateTime 09:58:0" in error
error = node.query_and_get_error(
"SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'"
).strip()
assert "Cannot convert string '2024-08-0 09:58:09' to type DateTime" in error

View File

@ -0,0 +1,3 @@
<clickhouse>
<cgroups_memory_usage_observer_wait_time>0</cgroups_memory_usage_observer_wait_time>
</clickhouse>

View File

@ -152,7 +152,7 @@ cat /proc/sys/kernel/core_pattern
{
time $SCRIPT_DIR/download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \
time stage=configure "$script_path"/compare.sh ; \
} 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log
} 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee -a compare.log
# Stop the servers to free memory. Normally they are restarted before getting
# the profile info, so they shouldn't use much, but if the comparison script

View File

@ -0,0 +1,4 @@
200
200
100
100

View File

@ -0,0 +1,23 @@
DROP TABLE IF EXISTS t_merge_tree SYNC;
DROP TABLE IF EXISTS t_replicated_merge_tree SYNC;
CREATE TABLE t_merge_tree(time Date, id String , name String) ENGINE = MergeTree() PARTITION BY time ORDER BY id;
CREATE TABLE t_replicated_merge_tree(time Date, id String, name String) ENGINE = ReplicatedMergeTree('/test/02352/{database}/t_rep','1') PARTITION BY time ORDER BY id;
INSERT INTO t_merge_tree select '2024-08-01', '1', toString(number) FROM numbers(100);
INSERT INTO t_merge_tree select '2024-08-02', '1', toString(number) FROM numbers(100);
INSERT INTO t_replicated_merge_tree select '2024-08-01', '1', toString(number) FROM numbers(100);
INSERT INTO t_replicated_merge_tree select '2024-08-02', '1', toString(number) FROM numbers(100);
SELECT COUNT() FROM t_merge_tree;
SELECT COUNT() FROM t_replicated_merge_tree;
DELETE FROM t_merge_tree IN PARTITION '2024-08-01' WHERE id = '1';
DELETE FROM t_replicated_merge_tree IN PARTITION '2024-08-01' WHERE id = '1';
SELECT COUNT() FROM t_merge_tree;
SELECT COUNT() FROM t_replicated_merge_tree;
DROP TABLE t_merge_tree SYNC;
DROP TABLE t_replicated_merge_tree SYNC;

View File

@ -48,9 +48,11 @@ def generate_data(q, total_number, use_token):
partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"]
last_number = 0
while True:
dup_simulate = random.randint(0, 3)
# 0 to simulate duplication
# 1 to simulate empty
simulate_flag = random.randint(0, 4)
# insert old data randomly. 25% of them are dup.
if dup_simulate == 0:
if simulate_flag == 0:
last_idx = len(old_data) - 1
if last_idx < 0:
continue
@ -58,6 +60,11 @@ def generate_data(q, total_number, use_token):
if idx < 0:
idx = 0
q.put(old_data[idx])
if simulate_flag == 1:
empty_insert_stmt = (
"insert into t_async_insert_dedup values format JSONEachRow"
)
q.put((empty_insert_stmt, ""))
else:
# insert new data.
chunk_size = random.randint(1, max_chunk_size)

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, zookeeper, no-parallel, no-fasttest
# Tags: long, zookeeper, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, zookeeper, no-parallel, no-fasttest
# Tags: long, zookeeper, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,4 +1,4 @@
-- Tags: no-fasttest, no-parallel
-- Tags: long, no-fasttest, no-parallel, no-tsan, no-msan, no-asan
set output_format_parquet_use_custom_encoder = 1;
set output_format_parquet_row_group_size = 1000;

View File

@ -0,0 +1,3 @@
0
OK
OK

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query "
CREATE TABLE tab
(
a DateTime,
pk String
) Engine = MergeTree() ORDER BY pk;
"
${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'"
${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'" 2>&1 | grep -F -q "Cannot parse time component of DateTime 09:58:0" && echo "OK" || echo "FAIL";
${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'" 2>&1 | grep -F -q "Cannot convert string '2024-08-0 09:58:09" && echo "OK" || echo "FAIL";

View File

@ -2730,6 +2730,7 @@ timeZoneOffset
timezones
tinylog
tmp
toBool
toColumnTypeName
toDate
toDateOrDefault