mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Added performance tests
This commit is contained in:
parent
ed295a934c
commit
5e03418651
@ -6,7 +6,9 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Interpreters/AggregationCommon.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -259,24 +261,21 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
|
||||
Arena complex_key_arena;
|
||||
|
||||
const DictionaryAttribute & attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
auto result = attribute.type->createColumn();
|
||||
|
||||
DefaultValueProvider default_value_provider(attribute.null_value, default_values_column);
|
||||
|
||||
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, complex_key_arena);
|
||||
const auto & requested_keys = extractor.getKeys();
|
||||
|
||||
HashMap<KeyType, size_t> key_to_fetched_index;
|
||||
key_to_fetched_index.reserve(requested_keys.size());
|
||||
|
||||
auto fetched_from_storage = attribute.type->createColumn();
|
||||
size_t fetched_key_index = 0;
|
||||
size_t requested_attribute_index = attribute_index_by_name.find(attribute_name)->second;
|
||||
|
||||
Columns block_key_columns;
|
||||
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
|
||||
size_t requested_key_index = 0;
|
||||
Field block_column_value;
|
||||
|
||||
/** In result stream keys are returned in same order as they were requested.
|
||||
* For example if we request keys [1, 2, 3, 4] but source has only [2, 3] we need to return to client
|
||||
* [default_value, 2, 3, default_value].
|
||||
* For each key fetched from source current algorithm adds default values until
|
||||
* requested key with requested_key_index match key fetched from source.
|
||||
* At the end we also need to process tail.
|
||||
*/
|
||||
block_key_columns.reserve(dictionary_keys_size);
|
||||
|
||||
BlockInputStreamPtr stream = getSourceBlockInputStream(key_columns, requested_keys);
|
||||
|
||||
@ -284,9 +283,6 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
|
||||
|
||||
while (const auto block = stream->read())
|
||||
{
|
||||
Columns block_key_columns;
|
||||
block_key_columns.reserve(dictionary_keys_size);
|
||||
|
||||
auto block_columns = block.getColumns();
|
||||
|
||||
/// Split into keys columns and attribute columns
|
||||
@ -301,35 +297,39 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
|
||||
size_t block_keys_size = block_keys.size();
|
||||
|
||||
const auto & block_column = block.safeGetByPosition(dictionary_keys_size + requested_attribute_index).column;
|
||||
fetched_from_storage->insertRangeFrom(*block_column, 0, block_keys_size);
|
||||
|
||||
for (size_t block_key_index = 0; block_key_index < block_keys_size; ++block_key_index)
|
||||
{
|
||||
auto block_key = block_keys[block_key_index];
|
||||
const auto & block_key = block_keys[block_key_index];
|
||||
|
||||
while (requested_key_index < requested_keys.size() &&
|
||||
block_key != requested_keys[requested_key_index])
|
||||
{
|
||||
block_column_value = default_value_provider.getDefaultValue(requested_key_index);
|
||||
result->insert(block_column_value);
|
||||
++requested_key_index;
|
||||
}
|
||||
|
||||
block_column->get(block_key_index, block_column_value);
|
||||
result->insert(block_column_value);
|
||||
++requested_key_index;
|
||||
key_to_fetched_index[block_key] = fetched_key_index;
|
||||
++fetched_key_index;
|
||||
}
|
||||
|
||||
block_key_columns.clear();
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
size_t requested_keys_size = requested_keys.size();
|
||||
Field value_to_insert;
|
||||
|
||||
Field default_value;
|
||||
/// Process tail, if source returned keys less keys sizes than we fetched insert default value for tail
|
||||
for (; requested_key_index < requested_keys_size; ++requested_key_index)
|
||||
size_t requested_keys_size = requested_keys.size();
|
||||
auto result = fetched_from_storage->cloneEmpty();
|
||||
result->reserve(requested_keys_size);
|
||||
|
||||
|
||||
for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
|
||||
{
|
||||
default_value = default_value_provider.getDefaultValue(requested_key_index);
|
||||
result->insert(default_value);
|
||||
const auto requested_key = requested_keys[requested_key_index];
|
||||
const auto * it = key_to_fetched_index.find(requested_key);
|
||||
|
||||
if (it)
|
||||
fetched_from_storage->get(it->getMapped(), value_to_insert);
|
||||
else
|
||||
value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
|
||||
|
||||
result->insert(value_to_insert);
|
||||
}
|
||||
|
||||
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
|
||||
@ -349,17 +349,21 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns &
|
||||
const auto & requested_keys = requested_keys_extractor.getKeys();
|
||||
size_t requested_keys_size = requested_keys.size();
|
||||
|
||||
HashMap<KeyType, size_t> requested_key_to_index;
|
||||
requested_key_to_index.reserve(requested_keys_size);
|
||||
|
||||
for (size_t i = 0; i < requested_keys.size(); ++i)
|
||||
{
|
||||
auto requested_key = requested_keys[i];
|
||||
requested_key_to_index[requested_key] = i;
|
||||
}
|
||||
|
||||
auto result = ColumnUInt8::create(requested_keys_size, false);
|
||||
auto & result_data = result->getData();
|
||||
|
||||
Columns block_key_columns;
|
||||
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
|
||||
size_t requested_key_index = 0;
|
||||
Field block_column_value;
|
||||
|
||||
/** Algorithm is the same as in getColumn method. There are only 2 details
|
||||
* 1. We does not process tail because result column is created with false default value.
|
||||
* 2. If requested key does not match key from source we set false in requested_key_index.
|
||||
*/
|
||||
block_key_columns.reserve(dictionary_keys_size);
|
||||
|
||||
BlockInputStreamPtr stream = getSourceBlockInputStream(key_columns, requested_keys);
|
||||
|
||||
@ -369,9 +373,6 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns &
|
||||
{
|
||||
auto block_columns = block.getColumns();
|
||||
|
||||
Columns block_key_columns;
|
||||
block_key_columns.reserve(dictionary_keys_size);
|
||||
|
||||
/// Split into keys columns and attribute columns
|
||||
for (size_t i = 0; i < dictionary_keys_size; ++i)
|
||||
{
|
||||
@ -381,28 +382,21 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns &
|
||||
|
||||
DictionaryKeysExtractor<dictionary_key_type> block_keys_extractor(block_key_columns, complex_key_arena);
|
||||
const auto & block_keys = block_keys_extractor.getKeys();
|
||||
size_t block_keys_size = block_keys.size();
|
||||
|
||||
for (size_t block_key_index = 0; block_key_index < block_keys_size; ++block_key_index)
|
||||
for (const auto & block_key : block_keys)
|
||||
{
|
||||
auto block_key = block_keys[block_key_index];
|
||||
const auto * it = requested_key_to_index.find(block_key);
|
||||
assert(it);
|
||||
|
||||
while (requested_key_index < requested_keys.size() &&
|
||||
block_key != requested_keys[requested_key_index])
|
||||
{
|
||||
result_data[requested_key_index] = false;
|
||||
++requested_key_index;
|
||||
}
|
||||
|
||||
result_data[requested_key_index] = true;
|
||||
++requested_key_index;
|
||||
size_t result_data_found_index = it->getMapped();
|
||||
result_data[result_data_found_index] = true;
|
||||
}
|
||||
|
||||
block_key_columns.clear();
|
||||
}
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
/// We does not add additional code for tail because result was initialized with false values
|
||||
|
||||
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
|
78
tests/performance/direct_dictionary.xml
Normal file
78
tests/performance/direct_dictionary.xml
Normal file
@ -0,0 +1,78 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE simple_direct_dictionary_test_table
|
||||
(
|
||||
id UInt64,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
) ENGINE = TinyLog;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
INSERT INTO simple_direct_dictionary_test_table
|
||||
SELECT number, number, toString(number), toDecimal64(number, 8), toString(number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY simple_direct_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_direct_dictionary_test_table'))
|
||||
LAYOUT(DIRECT())
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE complex_direct_dictionary_test_table
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
) ENGINE = TinyLog;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
INSERT INTO simple_direct_dictionary_test_table
|
||||
SELECT number, toString(number), number, toString(number), toDecimal64(number, 8), toString(number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY complex_direct_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
)
|
||||
PRIMARY KEY id, id_key
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_direct_dictionary_test_table'))
|
||||
LAYOUT(COMPLEX_KEY_DIRECT())
|
||||
</create_query>
|
||||
|
||||
<query>SELECT dictGet('default.simple_direct_dictionary', 'value_int', number) FROM system.numbers LIMIT 150000;</query>
|
||||
<query>SELECT dictGet('default.simple_direct_dictionary', 'value_string', number) FROM system.numbers LIMIT 150000;</query>
|
||||
<query>SELECT dictGet('default.simple_direct_dictionary', 'value_decimal', number) FROM system.numbers LIMIT 150000;</query>
|
||||
<query>SELECT dictGet('default.simple_direct_dictionary', 'value_string_nullable', number) FROM system.numbers LIMIT 150000;</query>
|
||||
|
||||
<query>SELECT dictGet('default.complex_direct_dictionary', 'value_int', (number, toString(number))) FROM system.numbers LIMIT 150000;</query>
|
||||
<query>SELECT dictGet('default.complex_direct_dictionary', 'value_string', (number, toString(number))) FROM system.numbers LIMIT 150000;</query>
|
||||
<query>SELECT dictGet('default.complex_direct_dictionary', 'value_decimal', (number, toString(number))) FROM system.numbers LIMIT 150000;</query>
|
||||
<query>SELECT dictGet('default.complex_direct_dictionary', 'value_string_nullable', (number, toString(number))) FROM system.numbers LIMIT 150000;</query>
|
||||
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user