Merge pull request #64492 from 0xfei/master

Add profile event `SelectQueriesWithPrimaryKeyUsage`
This commit is contained in:
Robert Schulze 2024-06-21 10:13:45 +00:00 committed by GitHub
commit 6dd675579f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 121 additions and 4 deletions

View File

@ -15,6 +15,7 @@
M(QueriesWithSubqueries, "Count queries with all subqueries") \ M(QueriesWithSubqueries, "Count queries with all subqueries") \
M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \ M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \
M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \ M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \
M(SelectQueriesWithPrimaryKeyUsage, "Count SELECT queries which use the primary key to evaluate the WHERE condition") \
M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \
M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \ M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \
M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \ M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \

View File

@ -120,6 +120,7 @@ namespace ProfileEvents
extern const Event SelectedParts; extern const Event SelectedParts;
extern const Event SelectedRanges; extern const Event SelectedRanges;
extern const Event SelectedMarks; extern const Event SelectedMarks;
extern const Event SelectQueriesWithPrimaryKeyUsage;
} }
namespace DB namespace DB
@ -1569,11 +1570,17 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
if (indexes->part_values && indexes->part_values->empty()) if (indexes->part_values && indexes->part_values->empty())
return std::make_shared<AnalysisResult>(std::move(result)); return std::make_shared<AnalysisResult>(std::move(result));
if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) if (indexes->key_condition.alwaysUnknownOrTrue())
{ {
throw Exception(ErrorCodes::INDEX_NOT_USED, if (settings.force_primary_key)
"Primary key ({}) is not used and setting 'force_primary_key' is set", {
fmt::join(primary_key_column_names, ", ")); throw Exception(ErrorCodes::INDEX_NOT_USED,
"Primary key ({}) is not used and setting 'force_primary_key' is set",
fmt::join(primary_key_column_names, ", "));
}
} else
{
ProfileEvents::increment(ProfileEvents::SelectQueriesWithPrimaryKeyUsage);
} }
LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString());

View File

@ -0,0 +1,8 @@
selects_with_pk_usage
0
selects_with_pk_usage
0
selects_with_pk_usage
1
selects_with_pk_usage
1

View File

@ -0,0 +1,101 @@
#!/usr/bin/env bash
# Tests profile event "SelectedMarksByPrimaryKeyUsage"
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
table_id="$(random_str 10)"
$CLICKHOUSE_CLIENT -q "
DROP TABLE IF EXISTS table_$table_id;"
$CLICKHOUSE_CLIENT -q "
CREATE TABLE table_$table_id (
pk Int64,
col1 Int64,
col2 Int64,
INDEX idx(col2) TYPE minmax
) ENGINE = MergeTree ORDER BY pk PARTITION BY (pk % 2);";
$CLICKHOUSE_CLIENT -q "
ALTER TABLE table_$table_id ADD PROJECTION proj (SELECT * ORDER BY col1);"
# Populate two partitions with 50k rows each. Each partition has >1 granules.
# We want SelectQueriesWithPrimaryKeyUsage to increase by +1 in each query, not by +1 per partition or by +1 per granule.
$CLICKHOUSE_CLIENT -q "
INSERT INTO table_$table_id SELECT number, number, number FROM numbers(100000);"
# Run SELECTs
# -- No filter
query_id="$(random_str 10)"
$CLICKHOUSE_CLIENT --query_id "$query_id" -q "
SELECT count(*) FROM table_$table_id FORMAT Null;"
$CLICKHOUSE_CLIENT -mn -q "
SYSTEM FLUSH LOGS;
SELECT
ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
FROM
system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query_id = '$query_id'
FORMAT TSVWithNames;
"
# -- Filter on non-PK column. However, it has a minmax-index defined. We expect the profile event to not increase.
query_id="$(random_str 10)"
$CLICKHOUSE_CLIENT --query_id "$query_id" -q "
SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;"
$CLICKHOUSE_CLIENT -mn -q "
SYSTEM FLUSH LOGS;
SELECT
ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
FROM
system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query_id = '$query_id'
FORMAT TSVWithNames;
"
# Filter on PK
query_id="$(random_str 10)"
$CLICKHOUSE_CLIENT --query_id "$query_id" -q "
SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;"
$CLICKHOUSE_CLIENT -mn -q "
SYSTEM FLUSH LOGS;
SELECT
ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
FROM
system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query_id = '$query_id'
FORMAT TSVWithNames;
"
# Filter on PK in projection
query_id="$(random_str 10)"
$CLICKHOUSE_CLIENT --query_id "$query_id" -q "
SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;"
$CLICKHOUSE_CLIENT -mn -q "
SYSTEM FLUSH LOGS;
SELECT
ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
FROM
system.query_log
WHERE
current_database = currentDatabase()
AND type = 'QueryFinish'
AND query_id = '$query_id'
FORMAT TSVWithNames;
"
$CLICKHOUSE_CLIENT -q "
DROP TABLE table_$table_id;"