From 6eb75228e9754bcc3a05022f6750b67c32a1bd9a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 9 Apr 2021 12:33:33 +0300 Subject: [PATCH 1/7] FlatDictionary performance test fix --- tests/performance/flat_dictionary.xml | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index a571785a7f0..8cffd30c25e 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -25,7 +25,7 @@ ) PRIMARY KEY id SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_flat_dictionary_source_table')) - LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000)) + LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 50000000)) LIFETIME(MIN 0 MAX 1000) @@ -33,7 +33,7 @@ INSERT INTO simple_key_flat_dictionary_source_table SELECT number, number, toString(number), toDecimal64(number, 8), toString(number) FROM system.numbers - LIMIT 5000000; + LIMIT 50000000; @@ -50,30 +50,22 @@ elements_count - 2500000 - 5000000 - 7500000 - 10000000 + 25000000 + 50000000 + 75000000 + 100000000 - SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, number) - FROM system.numbers - LIMIT {elements_count} - FORMAT Null; - - - - SELECT dictHas('default.simple_key_flat_dictionary', number) + SELECT dictGet('default.simple_key_flat_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), number) FROM system.numbers LIMIT {elements_count} FORMAT Null; DROP TABLE IF EXISTS simple_key_flat_dictionary_source_table - DROP DICTIONARY IF EXISTS simple_key_flat_dictionary From f8c8b5e49dd03606dcf81a68a4030d17077fa013 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 9 Apr 2021 14:10:08 +0300 Subject: [PATCH 2/7] Updated test --- tests/performance/flat_dictionary.xml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index 8cffd30c25e..bea359ab4b1 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -25,7 +25,7 @@ ) PRIMARY KEY id SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_flat_dictionary_source_table')) - LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 50000000)) + LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000)) LIFETIME(MIN 0 MAX 1000) @@ -33,7 +33,7 @@ INSERT INTO simple_key_flat_dictionary_source_table SELECT number, number, toString(number), toDecimal64(number, 8), toString(number) FROM system.numbers - LIMIT 50000000; + LIMIT 5000000; @@ -50,7 +50,6 @@ elements_count - 25000000 50000000 75000000 100000000 @@ -59,13 +58,21 @@ - SELECT dictGet('default.simple_key_flat_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), number) + SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, rand64() % toUInt64(10000000)) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + SELECT dictHas('default.simple_key_flat_dictionary', rand64() % toUInt64(10000000)) FROM system.numbers LIMIT {elements_count} FORMAT Null; DROP TABLE IF EXISTS simple_key_flat_dictionary_source_table + DROP DICTIONARY IF EXISTS simple_key_flat_dictionary From 1557161d92f55f4f9796a25f875457431c81a046 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 10 Apr 2021 00:55:54 +0300 Subject: [PATCH 3/7] Updated test --- tests/performance/flat_dictionary.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index bea359ab4b1..8111084586a 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -50,9 +50,9 @@ elements_count - 50000000 - 75000000 - 100000000 + 5000000 + 7500000 + 10000000 From 8abaf01a5d3ec5f41a29e3cf4915a34efc219b9a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 13 Apr 2021 15:57:11 +0300 Subject: [PATCH 4/7] Updated dictionaries tests --- tests/performance/direct_dictionary.xml | 24 ++++++++++++++---------- tests/performance/flat_dictionary.xml | 15 +++++++-------- tests/performance/hashed_dictionary.xml | 20 ++++++++++++++------ 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/tests/performance/direct_dictionary.xml b/tests/performance/direct_dictionary.xml index e827ea0a76f..3f01449ed99 100644 --- a/tests/performance/direct_dictionary.xml +++ b/tests/performance/direct_dictionary.xml @@ -55,14 +55,14 @@ INSERT INTO simple_key_direct_dictionary_source_table SELECT number, number, toString(number), toDecimal64(number, 8), toString(number) FROM system.numbers - LIMIT 100000; + LIMIT 50000; INSERT INTO complex_key_direct_dictionary_source_table SELECT number, toString(number), number, toString(number), toDecimal64(number, 8), toString(number) FROM system.numbers - LIMIT 100000; + LIMIT 50000; @@ -79,47 +79,51 @@ elements_count - 25000 50000 75000 - 100000 - SELECT dictGet('default.simple_key_direct_dictionary', {column_name}, number) + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictGet('default.simple_key_direct_dictionary', {column_name}, key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictGet('default.simple_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), number) + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictGet('default.simple_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictHas('default.simple_key_direct_dictionary', number) + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictHas('default.simple_key_direct_dictionary', key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictGet('default.complex_key_direct_dictionary', {column_name}, (number, toString(number))) + WITH (number, toString(number)) as key + SELECT dictGet('default.complex_key_direct_dictionary', {column_name}, key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictGet('default.complex_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), (number, toString(number))) + WITH (number, toString(number)) as key + SELECT dictGet('default.complex_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictHas('default.complex_key_direct_dictionary', (number, toString(number))) + WITH (number, toString(number)) as key + SELECT dictHas('default.complex_key_direct_dictionary', key) FROM system.numbers LIMIT {elements_count} FORMAT Null; diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index 8111084586a..92ed975a671 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -1,8 +1,4 @@ - - please_fix_me - - CREATE TABLE simple_key_flat_dictionary_source_table ( @@ -52,22 +48,25 @@ 5000000 7500000 - 10000000 - SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, rand64() % toUInt64(10000000)) + SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, rand64() % toUInt64({elements_count})) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictHas('default.simple_key_flat_dictionary', rand64() % toUInt64(10000000)) + SELECT * FROM simple_key_flat_dictionary FORMAT Null; + + + + SELECT dictHas('default.simple_key_flat_dictionary', rand64() % toUInt64(75000000)) FROM system.numbers - LIMIT {elements_count} + LIMIT 75000000 FORMAT Null; diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml index a38d2f30c23..b83018c67df 100644 --- a/tests/performance/hashed_dictionary.xml +++ b/tests/performance/hashed_dictionary.xml @@ -81,35 +81,43 @@ elements_count - 2500000 5000000 7500000 - 10000000 - SELECT dictGet('default.simple_key_hashed_dictionary', {column_name}, number) + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictGet('default.simple_key_hashed_dictionary', {column_name}, key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictHas('default.simple_key_hashed_dictionary', number) + SELECT * FROM default.simple_key_hashed_dictionary; + + + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictHas('default.simple_key_hashed_dictionary', key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictGet('default.complex_key_hashed_dictionary', {column_name}, (number, toString(number))) + WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key + SELECT dictGet('default.complex_key_hashed_dictionary', {column_name}, key) FROM system.numbers LIMIT {elements_count} FORMAT Null; - SELECT dictHas('default.complex_key_hashed_dictionary', (number, toString(number))) + SELECT * FROM default.complex_key_hashed_dictionary; + + + WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key + SELECT dictHas('default.complex_key_hashed_dictionary', key) FROM system.numbers LIMIT {elements_count} FORMAT Null; From da3d3e906a202b7cb4e718f0380805f07bb3cd06 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 13 Apr 2021 21:13:04 +0300 Subject: [PATCH 5/7] Updated tests --- tests/performance/flat_dictionary.xml | 3 ++- tests/performance/hashed_dictionary.xml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index 92ed975a671..56a94358eb9 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -60,7 +60,8 @@ - SELECT * FROM simple_key_flat_dictionary FORMAT Null; + SELECT * FROM simple_key_flat_dictionary + FORMAT Null; diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml index b83018c67df..cd19ba035e5 100644 --- a/tests/performance/hashed_dictionary.xml +++ b/tests/performance/hashed_dictionary.xml @@ -113,7 +113,8 @@ FORMAT Null; - SELECT * FROM default.complex_key_hashed_dictionary; + SELECT * FROM default.complex_key_hashed_dictionary + FORMAT Null; WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key From 2c3abcaad12175b2545990e2f37515ba4c270523 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 14 Apr 2021 16:49:38 +0300 Subject: [PATCH 6/7] Updated test --- src/Dictionaries/DirectDictionary.cpp | 8 ++++++++ tests/performance/flat_dictionary.xml | 6 ++++-- tests/performance/hashed_dictionary.xml | 3 ++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index bacb1a87dc9..ed5da3eead0 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -51,6 +51,14 @@ Columns DirectDictionary::getColumns( key_to_fetched_index.reserve(requested_keys.size()); auto fetched_columns_from_storage = request.makeAttributesResultColumns(); + for (size_t attribute_index = 0; attribute_index < request.attributesSize(); ++attribute_index) + { + if (!request.shouldFillResultColumnWithIndex(attribute_index)) + continue; + + auto & fetched_column_from_storage = fetched_columns_from_storage[attribute_index]; + fetched_column_from_storage->reserve(requested_keys.size()); + } size_t fetched_key_index = 0; diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index 56a94358eb9..a80631db541 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -53,7 +53,8 @@ - SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, rand64() % toUInt64({elements_count})) + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictGet('default.simple_key_flat_dictionary', {column_name}, key) FROM system.numbers LIMIT {elements_count} FORMAT Null; @@ -65,7 +66,8 @@ - SELECT dictHas('default.simple_key_flat_dictionary', rand64() % toUInt64(75000000)) + WITH rand64() % toUInt64(75000000) as key + SELECT dictHas('default.simple_key_flat_dictionary', key) FROM system.numbers LIMIT 75000000 FORMAT Null; diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml index cd19ba035e5..5cbe1caeb23 100644 --- a/tests/performance/hashed_dictionary.xml +++ b/tests/performance/hashed_dictionary.xml @@ -95,7 +95,8 @@ FORMAT Null; - SELECT * FROM default.simple_key_hashed_dictionary; + SELECT * FROM default.simple_key_hashed_dictionary + FORMAT Null; WITH rand64() % toUInt64({elements_count}) as key From b8a1ead3e9899ff4cbda7f8866bc7e6ff4323496 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 15 Apr 2021 13:51:40 +0300 Subject: [PATCH 7/7] Updated hashed_dictionary test --- tests/performance/hashed_dictionary.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml index 5cbe1caeb23..26164b4f888 100644 --- a/tests/performance/hashed_dictionary.xml +++ b/tests/performance/hashed_dictionary.xml @@ -94,10 +94,6 @@ LIMIT {elements_count} FORMAT Null; - - SELECT * FROM default.simple_key_hashed_dictionary - FORMAT Null; - WITH rand64() % toUInt64({elements_count}) as key SELECT dictHas('default.simple_key_hashed_dictionary', key) @@ -113,10 +109,6 @@ LIMIT {elements_count} FORMAT Null; - - SELECT * FROM default.complex_key_hashed_dictionary - FORMAT Null; - WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key SELECT dictHas('default.complex_key_hashed_dictionary', key)