Performance test

This commit is contained in:
Michael Kolupaev 2023-08-17 18:00:34 +00:00
parent 2f4d433e69
commit d752611c43
4 changed files with 19 additions and 4 deletions

View File

@ -389,7 +389,9 @@ ParquetBlockInputFormat::~ParquetBlockInputFormat()
void ParquetBlockInputFormat::setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context)
{
if (format_settings.parquet.filter_push_down)
/// When analyzer is enabled, query_info.filter_asts is missing sets and maybe some type casts,
/// so don't use it. I'm not sure how to support analyzer here: https://github.com/ClickHouse/ClickHouse/issues/53536
if (format_settings.parquet.filter_push_down && !context->getSettingsRef().allow_experimental_analyzer)
key_condition.emplace(query_info, context, getPort().getHeader().getNames(),
std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(
getPort().getHeader().getColumnsWithTypeAndName())));
@ -428,7 +430,7 @@ void ParquetBlockInputFormat::initializeIfNeeded()
if (key_condition.has_value() &&
!key_condition->checkInHyperrectangle(
getHyperrectangleForRowGroup(*metadata, idx, getPort().getHeader(), format_settings),
getHyperrectangleForRowGroup(*metadata, row_group, getPort().getHeader(), format_settings),
getPort().getHeader().getDataTypes()).can_be_true)
continue;

View File

@ -4,11 +4,11 @@ This directory contains `.xml`-files with performance tests for @akuzm tool.
### How to write performance test
First of all you should check existing tests don't cover your case. If there are no such tests than you should write your own.
First of all you should check existing tests don't cover your case. If there are no such tests then you should write your own.
You can use `substitions`, `create`, `fill` and `drop` queries to prepare test. You can find examples in this folder.
If your test continued more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones.
If your test takes more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones.
### How to run performance test

View File

@ -0,0 +1,9 @@
<test>
<create_query>create table if not exists t (key UInt64, value String) engine = File(Parquet) settings output_format_parquet_use_custom_encoder=1, output_format_parquet_row_group_size=100000</create_query>
<fill_query>insert into t select number, toString(number) from numbers(2000000) settings max_threads=16, max_insert_threads=16, max_insert_block_size=100000, max_block_size=100000</fill_query>
<query>select sum(cityHash64(*)) from t where key between 1050000 and 1150000 settings max_threads=1</query>
<drop_query>drop table if exists t</drop_query>
</test>

View File

@ -8,6 +8,10 @@ set optimize_or_like_chain = 0;
set max_block_size = 100000;
set max_insert_threads = 1;
-- Analyzer breaks the queries with IN and some queries with BETWEEN.
-- TODO: Figure out why.
set allow_experimental_analyzer=0;
-- Try all the types.
insert into function file('02841.parquet')
-- Use negative numbers to test sign extension for signed types and lack of sign extension for