From d13a11b42101e152aaf6129812e40f3b16ca1a46 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 13 May 2020 17:14:05 +0300 Subject: [PATCH 1/2] fix backward compatibility with tuples and distributed --- src/Parsers/ASTLiteral.cpp | 41 ++++++++++++++-- .../__init__.py | 0 .../configs/remote_servers.xml | 18 +++++++ .../test.py | 47 +++++++++++++++++++ 4 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_distributed_backward_compatability/__init__.py create mode 100644 tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml create mode 100644 tests/integration/test_distributed_backward_compatability/test.py diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 92d57687426..184d361ae2e 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include namespace DB @@ -14,30 +16,59 @@ void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const applyVisitor(FieldVisitorHash(hash_state), value); } +/// Writes 'tuple' word before tuple literals for backward compatibility reasons. +/// TODO: remove, when versions lower than 20.3 will be rearely used. +class FieldVisitorToColumnName : public StaticVisitor +{ +public: + template + String operator() (const T & x) const { return visitor(x); } + +private: + FieldVisitorToString visitor; +}; + +template<> +String FieldVisitorToColumnName::operator() (const Tuple & x) const +{ + WriteBufferFromOwnString wb; + + wb << "tuple("; + for (auto it = x.begin(); it != x.end(); ++it) + { + if (it != x.begin()) + wb << ", "; + wb << applyVisitor(*this, *it); + } + wb << ')'; + + return wb.str(); +} + void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const { /// 100 - just arbitrary value. constexpr auto min_elements_for_hashing = 100; - /// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them. + /// Special case for very large arrays. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) + /// TODO: Also do hashing for large tuples, when versions lower than 20.3 will be rarely used, because it breaks backward compatibility. auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing) - || (type == Field::Types::Tuple && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); UInt64 low, high; hash.get128(low, high); - writeCString(type == Field::Types::Array ? "__array_" : "__tuple_", ostr); + writeCString("__array_", ostr); writeText(low, ostr); ostr.write('_'); writeText(high, ostr); } else { - String column_name = applyVisitor(FieldVisitorToString(), value); + String column_name = applyVisitor(FieldVisitorToColumnName(), value); writeString(column_name, ostr); } } diff --git a/tests/integration/test_distributed_backward_compatability/__init__.py b/tests/integration/test_distributed_backward_compatability/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml b/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml new file mode 100644 index 00000000000..998a969e87e --- /dev/null +++ b/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + \ No newline at end of file diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py new file mode 100644 index 00000000000..2b050f379a2 --- /dev/null +++ b/tests/integration/test_distributed_backward_compatability/test.py @@ -0,0 +1,47 @@ +import pytest +import time + +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import TSV + + +cluster = ClickHouseCluster(__file__) + +node_old = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], image='yandex/clickhouse-server:19.17.8.54', stay_alive=True, with_installed_binary=True) +node_new = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml']) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in (node_old, node_new): + node.query("CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id") + + node_old.query("INSERT INTO local_table VALUES (1, 'node1')") + node_new.query("INSERT INTO local_table VALUES (2, 'node2')") + + node_old.query("CREATE TABLE distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table)") + node_new.query("CREATE TABLE distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table)") + + yield cluster + + finally: + cluster.shutdown() + +def test_distributed_in_tuple(started_cluster): + query1 = "SELECT count() FROM distributed WHERE (id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))" + query2 = "SELECT sum((id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))) FROM distributed" + assert node_old.query(query1) == "1\n" + assert node_old.query(query2) == "1\n" + assert node_new.query(query1) == "1\n" + assert node_new.query(query2) == "1\n" + + large_set = '(' + ','.join([str(i) for i in range(1000)]) + ')' + query3 = "SELECT count() FROM distributed WHERE id IN " + large_set + query4 = "SELECT sum(id IN {}) FROM distributed".format(large_set) + assert node_old.query(query3) == "2\n" + assert node_old.query(query4) == "2\n" + assert node_new.query(query3) == "2\n" + assert node_new.query(query4) == "2\n" \ No newline at end of file From 4c0cbc78a992d85ee4f7dd6041143284bd1f13f8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 May 2020 23:44:52 +0300 Subject: [PATCH 2/2] trigger CI --- .../configs/remote_servers.xml | 2 +- .../integration/test_distributed_backward_compatability/test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml b/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml index 998a969e87e..ebce4697529 100644 --- a/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml +++ b/tests/integration/test_distributed_backward_compatability/configs/remote_servers.xml @@ -15,4 +15,4 @@ - \ No newline at end of file + diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py index 2b050f379a2..a0362cea49e 100644 --- a/tests/integration/test_distributed_backward_compatability/test.py +++ b/tests/integration/test_distributed_backward_compatability/test.py @@ -44,4 +44,4 @@ def test_distributed_in_tuple(started_cluster): assert node_old.query(query3) == "2\n" assert node_old.query(query4) == "2\n" assert node_new.query(query3) == "2\n" - assert node_new.query(query4) == "2\n" \ No newline at end of file + assert node_new.query(query4) == "2\n"