Merge pull request #6447 from yandex/fix_complex_key_cached_string_dict

Fix bug with memory allocation for string fields in complex key cache dictionary
This commit is contained in:
alexey-milovidov 2019-08-12 22:30:39 +03:00 committed by GitHub
commit f992df8cc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 152 additions and 3 deletions

View File

@ -63,9 +63,9 @@ void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const s
const auto str_size = string.size();
if (str_size != 0)
{
auto string_ptr = string_arena->alloc(str_size + 1);
std::copy(string.data(), string.data() + str_size + 1, string_ptr);
string_ref = StringRef{string_ptr, str_size};
auto str_ptr = string_arena->alloc(str_size);
std::copy(string.data(), string.data() + str_size, str_ptr);
string_ref = StringRef{str_ptr, str_size};
}
else
string_ref = {};

View File

@ -0,0 +1,30 @@
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<mark_cache_size>5368709120</mark_cache_size>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
<dictionaries_config>/etc/clickhouse-server/config.d/*.xml</dictionaries_config>
</yandex>

View File

@ -0,0 +1,45 @@
<yandex>
<dictionary>
<name>radars</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>default</db>
<table>radars_table</table>
</clickhouse>
</source>
<structure>
<key>
<attribute>
<name>radar_id</name>
<type>String</type>
<hierarchical>False</hierarchical>
<injective>False</injective>
</attribute>
</key>
<attribute>
<name>radar_ip</name>
<type>String</type>
<null_value></null_value>
<hierarchical>False</hierarchical>
<injective>True</injective>
</attribute>
<attribute>
<name>client_id</name>
<type>String</type>
<null_value></null_value>
<hierarchical>False</hierarchical>
<injective>True</injective>
</attribute>
</structure>
<layout>
<complex_key_cache>
<size_in_cells>20</size_in_cells>
</complex_key_cache>
</layout>
<lifetime>1</lifetime>
</dictionary>
</yandex>

View File

@ -0,0 +1,23 @@
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<password></password>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default>
</default>
</quotas>
</yandex>

View File

@ -0,0 +1,51 @@
import pytest
import os
import time
from helpers.cluster import ClickHouseCluster
import random
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
node = cluster.add_instance('node', main_configs=['configs/dictionaries/complex_key_cache_string.xml'])
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
node.query("create table radars_table (radar_id String, radar_ip String, client_id String) engine=MergeTree() order by radar_id")
yield cluster
finally:
cluster.shutdown()
def test_memory_consumption(started_cluster):
node.query("insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format('w' * 8))
node.query("insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format('x' * 16))
node.query("insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format('y' * 32))
node.query("insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format('z' * 64))
# Fill dictionary
node.query("select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)")
allocated_first = int(node.query("select bytes_allocated from system.dictionaries where name = 'radars'").strip())
alloc_array = []
for i in xrange(5):
node.query("select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)")
allocated = int(node.query("select bytes_allocated from system.dictionaries where name = 'radars'").strip())
alloc_array.append(allocated)
# size doesn't grow
assert all(allocated_first >= a for a in alloc_array)
for i in xrange(5):
node.query("select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)")
allocated = int(node.query("select bytes_allocated from system.dictionaries where name = 'radars'").strip())
alloc_array.append(allocated)
# size doesn't grow
assert all(allocated_first >= a for a in alloc_array)