From 49d683c34ce0ffed07bfebaaf49dc4a49350d5fc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 6 Sep 2019 17:58:23 +0300 Subject: [PATCH 001/215] Auto version update to [19.14.2.1] [54425] --- dbms/cmake/version.cmake | 8 ++++---- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 332fa51ab26..e13c8f66311 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) -set(VERSION_PATCH 1) -set(VERSION_GITHASH adfc36917222bdb03eba069f0cad0f4f5b8f1c94) -set(VERSION_DESCRIBE v19.14.1.1-prestable) -set(VERSION_STRING 19.14.1.1) +set(VERSION_PATCH 2) +set(VERSION_GITHASH 6f1a8c37abe6ee4e7ee74c0b5cb9c05a87417b61) +set(VERSION_DESCRIBE v19.14.2.1-prestable) +set(VERSION_STRING 19.14.2.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index f1db1b81185..bab2c62d8df 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.13.1.1) unstable; urgency=low +clickhouse (19.14.2.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 23 Jul 2019 11:20:49 +0300 + -- clickhouse-release Fri, 06 Sep 2019 17:58:20 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 68cdf3f0204..cb28c075540 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.13.1.* +ARG version=19.14.2.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 934c1921a67..53c336d82c0 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.13.1.* +ARG version=19.14.2.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 5c2bd25b48c..9f26a36b913 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.13.1.* +ARG version=19.14.2.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 9ed197c24f176a2ed820768d8e4cb8162d2fb234 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Mon, 9 Sep 2019 19:59:51 +0300 Subject: [PATCH 002/215] Store offsets manually for each message (#6872) (cherry picked from commit 6c32fc3fc11a27a09b174f1d7b4dd8550b79e918) --- dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp | 7 ++++--- dbms/src/Storages/Kafka/StorageKafka.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 4614e581a3c..823eb632b7f 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -72,9 +72,7 @@ void ReadBufferFromKafkaConsumer::commit() PrintOffsets("Polled offset", consumer->get_offsets_position(consumer->get_assignment())); - /// Since we can poll more messages than we already processed - commit only processed messages. - if (!messages.empty()) - consumer->async_commit(*std::prev(current)); + consumer->async_commit(); PrintOffsets("Committed offset", consumer->get_offsets_committed(consumer->get_assignment())); @@ -186,6 +184,9 @@ bool ReadBufferFromKafkaConsumer::nextImpl() auto new_position = reinterpret_cast(const_cast(current->get_payload().get_data())); BufferBase::set(new_position, current->get_payload().get_size(), 0); + /// Since we can poll more messages than we already processed - commit only processed messages. + consumer->store_offset(*current); + ++current; return true; diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 2b41fa9e772..ed067993a18 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -261,9 +261,10 @@ ConsumerBufferPtr StorageKafka::createReadBuffer() conf.set("metadata.broker.list", brokers); conf.set("group.id", group); conf.set("client.id", VERSION_FULL); - conf.set("auto.offset.reset", "smallest"); // If no offset stored for this group, read all messages from the start - conf.set("enable.auto.commit", "false"); // We manually commit offsets after a stream successfully finished - conf.set("enable.partition.eof", "false"); // Ignore EOF messages + conf.set("auto.offset.reset", "smallest"); // If no offset stored for this group, read all messages from the start + conf.set("enable.auto.commit", "false"); // We manually commit offsets after a stream successfully finished + conf.set("enable.auto.offset.store", "false"); // Update offset automatically - to commit them all at once. + conf.set("enable.partition.eof", "false"); // Ignore EOF messages updateConfiguration(conf); // Create a consumer and subscribe to topics From cf97326931e79f7ff7af7b4c37f6bc0e6c12d3ff Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2019 13:32:19 +0300 Subject: [PATCH 003/215] Auto version update to [19.14.2.2] [54425] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index e13c8f66311..b99fd1eac6a 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) set(VERSION_PATCH 2) -set(VERSION_GITHASH 6f1a8c37abe6ee4e7ee74c0b5cb9c05a87417b61) -set(VERSION_DESCRIBE v19.14.2.1-prestable) -set(VERSION_STRING 19.14.2.1) +set(VERSION_GITHASH 9ed197c24f176a2ed820768d8e4cb8162d2fb234) +set(VERSION_DESCRIBE v19.14.2.2-prestable) +set(VERSION_STRING 19.14.2.2) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From 94788f7702248692d796aaf08b38376707aae74f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2019 13:32:37 +0300 Subject: [PATCH 004/215] Auto version update to [19.14.3.1] [54425] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index b99fd1eac6a..d0c543f25c7 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) -set(VERSION_PATCH 2) +set(VERSION_PATCH 3) set(VERSION_GITHASH 9ed197c24f176a2ed820768d8e4cb8162d2fb234) -set(VERSION_DESCRIBE v19.14.2.2-prestable) -set(VERSION_STRING 19.14.2.2) +set(VERSION_DESCRIBE v19.14.3.1-prestable) +set(VERSION_STRING 19.14.3.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index bab2c62d8df..9ac29181056 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.14.2.1) unstable; urgency=low +clickhouse (19.14.3.1) unstable; urgency=low * Modified source code - -- clickhouse-release Fri, 06 Sep 2019 17:58:20 +0300 + -- clickhouse-release Tue, 10 Sep 2019 13:32:34 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index cb28c075540..30b09d7645e 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.2.* +ARG version=19.14.3.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 53c336d82c0..5a2abec6245 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.2.* +ARG version=19.14.3.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 9f26a36b913..16e23213084 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.2.* +ARG version=19.14.3.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From f38d2483f8f0e72f4d8cca0a2ad612de6318b941 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2019 20:49:19 +0300 Subject: [PATCH 005/215] Auto version update to [19.14.3.3] [54425] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index d0c543f25c7..bef2500f0e0 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) set(VERSION_PATCH 3) -set(VERSION_GITHASH 9ed197c24f176a2ed820768d8e4cb8162d2fb234) -set(VERSION_DESCRIBE v19.14.3.1-prestable) -set(VERSION_STRING 19.14.3.1) +set(VERSION_GITHASH 94788f7702248692d796aaf08b38376707aae74f) +set(VERSION_DESCRIBE v19.14.3.3-stable) +set(VERSION_STRING 19.14.3.3) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From 7797dee7f1051de3fe2b8791ec944c7ebb22e9c4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2019 20:49:38 +0300 Subject: [PATCH 006/215] Auto version update to [19.14.4.1] [54425] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index bef2500f0e0..15f6373ba3f 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) -set(VERSION_PATCH 3) +set(VERSION_PATCH 4) set(VERSION_GITHASH 94788f7702248692d796aaf08b38376707aae74f) -set(VERSION_DESCRIBE v19.14.3.3-stable) -set(VERSION_STRING 19.14.3.3) +set(VERSION_DESCRIBE v19.14.4.1-stable) +set(VERSION_STRING 19.14.4.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index 9ac29181056..55716635681 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.14.3.1) unstable; urgency=low +clickhouse (19.14.4.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 10 Sep 2019 13:32:34 +0300 + -- clickhouse-release Tue, 10 Sep 2019 20:49:34 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 30b09d7645e..04904134763 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.3.* +ARG version=19.14.4.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5a2abec6245..62eb5f41e8e 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.3.* +ARG version=19.14.4.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 16e23213084..49799749fbd 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.3.* +ARG version=19.14.4.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 9f15ebf3fdfc73f116055409036a762b5d1fb7df Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 12 Sep 2019 03:20:38 +0300 Subject: [PATCH 007/215] Merge pull request #6909 from abyss7/issue-6902 Put delimiter only after consuming a message (cherry picked from commit cb2f4ebfaff95fbb0fb55f332e1d38867f0224c5) --- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 3 +-- dbms/tests/integration/test_storage_kafka/test.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 823eb632b7f..083b471d4f1 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -150,8 +150,6 @@ bool ReadBufferFromKafkaConsumer::nextImpl() return true; } - put_delimiter = (delimiter != 0); - if (current == messages.end()) { if (intermediate_commit) @@ -183,6 +181,7 @@ bool ReadBufferFromKafkaConsumer::nextImpl() // XXX: very fishy place with const casting. auto new_position = reinterpret_cast(const_cast(current->get_payload().get_data())); BufferBase::set(new_position, current->get_payload().get_size(), 0); + put_delimiter = (delimiter != 0); /// Since we can poll more messages than we already processed - commit only processed messages. consumer->store_offset(*current); diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index a3aa290c98d..c629ac9f22e 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -248,6 +248,21 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): kafka_check_result(result, True) +@pytest.mark.timeout(180) +def test_kafka_select_empty(kafka_cluster): + instance.query(''' + CREATE TABLE test.kafka (key UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'empty', + kafka_group_name = 'empty', + kafka_format = 'TSV', + kafka_row_delimiter = '\\n'; + ''') + + assert int(instance.query('SELECT count() FROM test.kafka')) == 0 + + @pytest.mark.timeout(180) def test_kafka_json_without_delimiter(kafka_cluster): instance.query(''' From da44d9fd9beaae81c9c98e1f976d306c23fb8eaf Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Sep 2019 03:01:42 +0300 Subject: [PATCH 008/215] Merge pull request #6853 from yandex/pipe-capacity-very-old-kernels Returned support for very old Linux kernels (that lack of F_GETPIPE_SZ fcntl) (cherry picked from commit 46e7b4f925f4715cf73220c4935ade845b13fa2b) --- dbms/src/Common/TraceCollector.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/dbms/src/Common/TraceCollector.cpp b/dbms/src/Common/TraceCollector.cpp index 6ed2074e47d..9451c3f88e5 100644 --- a/dbms/src/Common/TraceCollector.cpp +++ b/dbms/src/Common/TraceCollector.cpp @@ -49,15 +49,26 @@ TraceCollector::TraceCollector(std::shared_ptr & trace_log_) #if !defined(__FreeBSD__) /** Increase pipe size to avoid slowdown during fine-grained trace collection. */ - constexpr int max_pipe_capacity_to_set = 1048576; int pipe_size = fcntl(trace_pipe.fds_rw[1], F_GETPIPE_SZ); if (-1 == pipe_size) - throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL); - for (errno = 0; errno != EPERM && pipe_size < max_pipe_capacity_to_set; pipe_size *= 2) - if (-1 == fcntl(trace_pipe.fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM) - throwFromErrno("Cannot increase pipe capacity to " + toString(pipe_size * 2), ErrorCodes::CANNOT_FCNTL); + { + if (errno == EINVAL) + { + LOG_INFO(log, "Cannot get pipe capacity, " << errnoToString(ErrorCodes::CANNOT_FCNTL) << ". Very old Linux kernels have no support for this fcntl."); + /// It will work nevertheless. + } + else + throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL); + } + else + { + constexpr int max_pipe_capacity_to_set = 1048576; + for (errno = 0; errno != EPERM && pipe_size < max_pipe_capacity_to_set; pipe_size *= 2) + if (-1 == fcntl(trace_pipe.fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM) + throwFromErrno("Cannot increase pipe capacity to " + toString(pipe_size * 2), ErrorCodes::CANNOT_FCNTL); - LOG_TRACE(log, "Pipe capacity is " << formatReadableSizeWithBinarySuffix(std::min(pipe_size, max_pipe_capacity_to_set))); + LOG_TRACE(log, "Pipe capacity is " << formatReadableSizeWithBinarySuffix(std::min(pipe_size, max_pipe_capacity_to_set))); + } #endif thread = ThreadFromGlobalPool(&TraceCollector::run, this); From 1602daba5937311a5d70a45dbae78435da9a8200 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 12 Sep 2019 00:36:59 +0300 Subject: [PATCH 009/215] Merge pull request #6911 from yandex/fix-insert-select-data-loss Fix insert select data loss (cherry picked from commit 6fbf9ca7ab88f02b39328f0c5cc83ee8c5cfdfd6) --- dbms/src/DataStreams/copyData.cpp | 2 -- dbms/src/Interpreters/executeQuery.cpp | 15 ++++++++++++++- .../01009_insert_select_data_loss.reference | 2 ++ .../0_stateless/01009_insert_select_data_loss.sql | 5 +++++ 4 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01009_insert_select_data_loss.reference create mode 100644 dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql diff --git a/dbms/src/DataStreams/copyData.cpp b/dbms/src/DataStreams/copyData.cpp index 5000c87be7c..9d17596fc8d 100644 --- a/dbms/src/DataStreams/copyData.cpp +++ b/dbms/src/DataStreams/copyData.cpp @@ -28,8 +28,6 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCall break; to.write(block); - if (!block.rows()) - to.flush(); progress(block); } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 3793f2f79c9..85130437155 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -35,6 +35,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -631,7 +632,19 @@ void executeQuery( if (set_query_id) set_query_id(context.getClientInfo().current_query_id); - copyData(*streams.in, *out); + if (ast->as()) + { + /// For Watch query, flush data if block is empty (to send data to client). + auto flush_callback = [&out](const Block & block) + { + if (block.rows() == 0) + out->flush(); + }; + + copyData(*streams.in, *out, [](){ return false; }, std::move(flush_callback)); + } + else + copyData(*streams.in, *out); } if (pipeline.initialized()) diff --git a/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.reference b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.reference new file mode 100644 index 00000000000..25e7f55667e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.reference @@ -0,0 +1,2 @@ +0 +10 diff --git a/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql new file mode 100644 index 00000000000..9a754d94323 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql @@ -0,0 +1,5 @@ +drop table if exists tab; +create table tab (x UInt64) engine = MergeTree order by tuple(); + +insert into tab select number as n from numbers(20) any inner join (select number * 10 as n from numbers(2)) using(n) settings any_join_distinct_right_table_keys = 1, max_block_size = 5; +select * from tab order by x; From 242bdeda5ff371b46e93105ae6460d0d6a8783f2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 6 Sep 2019 20:15:14 +0300 Subject: [PATCH 010/215] Merge pull request #6837 from filimonov/fix_some_pvs_reported_issues Fix some pvs reported issues (cherry picked from commit c90dfc1e48f83de8880f2c2e73dea20ab9618598) --- dbms/src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +- dbms/src/Interpreters/Context.cpp | 4 ++-- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp index dc0d3ef27b1..989595b3647 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -30,7 +30,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin /// Set as unchanged to avoid sending to remote server. new_settings.max_concurrent_queries_for_user.changed = false; new_settings.max_memory_usage_for_user.changed = false; - new_settings.max_memory_usage_for_all_queries = false; + new_settings.max_memory_usage_for_all_queries.changed = false; Context new_context(context); new_context.setSettings(new_settings); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 53b652681e2..f0ec292f647 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1222,8 +1222,8 @@ void Context::setCurrentQueryId(const String & query_id) } words; } random; - random.words.a = thread_local_rng(); - random.words.b = thread_local_rng(); + random.words.a = thread_local_rng(); //-V656 + random.words.b = thread_local_rng(); //-V656 /// Use protected constructor. struct qUUID : Poco::UUID diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 05ac99196a4..a795fac596d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -418,8 +418,6 @@ QueryPipeline InterpreterSelectQuery::executeWithProcessors() Block InterpreterSelectQuery::getSampleBlockImpl() { - FilterInfoPtr filter_info; - /// Need to create sets before analyzeExpressions(). Otherwise some sets for index won't be created. query_analyzer->makeSetsForIndex(getSelectQuery().where()); query_analyzer->makeSetsForIndex(getSelectQuery().prewhere()); @@ -431,8 +429,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl() options.to_stage, context, storage, - true, - filter_info); + true, // only_types + {} // filter_info + ); if (options.to_stage == QueryProcessingStage::Enum::FetchColumns) { From d341531cb1c3a507dc9d5a0fada2648cf38a595a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 11 Sep 2019 02:03:20 +0300 Subject: [PATCH 011/215] Merge pull request #6882 from clemrodriguez/issue-6825 Fix too early MySQL connection closure in MySQLBlockInputStreamream.cpp (cherry picked from commit a446ec5e61ff068327a963030f9a2e3b315b762d) --- dbms/src/Formats/MySQLBlockInputStream.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Formats/MySQLBlockInputStream.cpp b/dbms/src/Formats/MySQLBlockInputStream.cpp index dcf609f62ce..4f8291ffebe 100644 --- a/dbms/src/Formats/MySQLBlockInputStream.cpp +++ b/dbms/src/Formats/MySQLBlockInputStream.cpp @@ -131,8 +131,6 @@ Block MySQLBlockInputStream::readImpl() row = result.fetch(); } - if (auto_close) - entry.disconnect(); return description.sample_block.cloneWithColumns(std::move(columns)); } From 60cc8fb29f15a58604eec0168ef2dd5bb531f9aa Mon Sep 17 00:00:00 2001 From: Yuriy Baranov Date: Mon, 9 Sep 2019 09:43:28 +0300 Subject: [PATCH 012/215] Merge pull request #6865 from yurriy/mysql Releasing resources after query execution in MySQL compatibility server (cherry picked from commit f6e1a26806baeb55cdd2033fb5429128cac1158c) --- dbms/programs/server/MySQLHandler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 3f16496b5be..7e555220489 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -293,7 +293,8 @@ void MySQLHandler::comQuery(ReadBuffer & payload) should_replace = true; } - executeQuery(should_replace ? empty_select : payload, *out, true, connection_context, set_content_type, nullptr); + Context query_context = connection_context; + executeQuery(should_replace ? empty_select : payload, *out, true, query_context, set_content_type, nullptr); if (!with_output) packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); From 5e106fe1a91981fbf6e6a231b3ef8b5478d192db Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 14 Sep 2019 01:43:49 +0300 Subject: [PATCH 013/215] Auto version update to [19.14.4.9] [54425] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 15f6373ba3f..aa4add51b2c 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) set(VERSION_PATCH 4) -set(VERSION_GITHASH 94788f7702248692d796aaf08b38376707aae74f) -set(VERSION_DESCRIBE v19.14.4.1-stable) -set(VERSION_STRING 19.14.4.1) +set(VERSION_GITHASH 60cc8fb29f15a58604eec0168ef2dd5bb531f9aa) +set(VERSION_DESCRIBE v19.14.4.9-prestable) +set(VERSION_STRING 19.14.4.9) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From c6464f2813c7d5cb939af11759d05705ba0f15ee Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 14 Sep 2019 01:44:07 +0300 Subject: [PATCH 014/215] Auto version update to [19.14.5.1] [54425] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index aa4add51b2c..af3c12512a6 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) -set(VERSION_PATCH 4) +set(VERSION_PATCH 5) set(VERSION_GITHASH 60cc8fb29f15a58604eec0168ef2dd5bb531f9aa) -set(VERSION_DESCRIBE v19.14.4.9-prestable) -set(VERSION_STRING 19.14.4.9) +set(VERSION_DESCRIBE v19.14.5.1-prestable) +set(VERSION_STRING 19.14.5.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index 55716635681..f2fcc1240d4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.14.4.1) unstable; urgency=low +clickhouse (19.14.5.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 10 Sep 2019 20:49:34 +0300 + -- clickhouse-release Sat, 14 Sep 2019 01:44:03 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 04904134763..872ba169315 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.4.* +ARG version=19.14.5.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 62eb5f41e8e..6ca1b69bb5e 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.4.* +ARG version=19.14.5.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 49799749fbd..cabad4e1c1f 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.4.* +ARG version=19.14.5.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From c5a328cbfa1ea124e1a59c3e76f8fc6f85a1fcbe Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 18 Sep 2019 17:53:35 +0300 Subject: [PATCH 015/215] Auto version update to [19.14.5.10] [54425] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index af3c12512a6..1663310e563 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) set(VERSION_PATCH 5) -set(VERSION_GITHASH 60cc8fb29f15a58604eec0168ef2dd5bb531f9aa) -set(VERSION_DESCRIBE v19.14.5.1-prestable) -set(VERSION_STRING 19.14.5.1) +set(VERSION_GITHASH c6464f2813c7d5cb939af11759d05705ba0f15ee) +set(VERSION_DESCRIBE v19.14.5.10-prestable) +set(VERSION_STRING 19.14.5.10) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From 33c50a7328456b7d1a1d8e262e5ce34f14fce960 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 18 Sep 2019 17:53:53 +0300 Subject: [PATCH 016/215] Auto version update to [19.14.6.1] [54425] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 1663310e563..565f2157390 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) -set(VERSION_PATCH 5) +set(VERSION_PATCH 6) set(VERSION_GITHASH c6464f2813c7d5cb939af11759d05705ba0f15ee) -set(VERSION_DESCRIBE v19.14.5.10-prestable) -set(VERSION_STRING 19.14.5.10) +set(VERSION_DESCRIBE v19.14.6.1-prestable) +set(VERSION_STRING 19.14.6.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index f2fcc1240d4..4d6d8be0b76 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.14.5.1) unstable; urgency=low +clickhouse (19.14.6.1) unstable; urgency=low * Modified source code - -- clickhouse-release Sat, 14 Sep 2019 01:44:03 +0300 + -- clickhouse-release Wed, 18 Sep 2019 17:53:49 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 872ba169315..771e6405414 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.5.* +ARG version=19.14.6.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 6ca1b69bb5e..0337faa86ff 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.5.* +ARG version=19.14.6.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index cabad4e1c1f..62cc9c0b958 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.5.* +ARG version=19.14.6.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 6beb8a716aa1db08f5992a7b5924a2d8834bb60d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 Sep 2019 17:41:49 +0300 Subject: [PATCH 017/215] Fix system contributors generating script --- dbms/src/Storages/System/StorageSystemContributors.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/StorageSystemContributors.sh b/dbms/src/Storages/System/StorageSystemContributors.sh index aea122df0dc..c4c4eb5ad30 100755 --- a/dbms/src/Storages/System/StorageSystemContributors.sh +++ b/dbms/src/Storages/System/StorageSystemContributors.sh @@ -2,11 +2,15 @@ set -x +# doesn't actually cd to directory, but return absolute path CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# cd to directory +cd $CUR_DIR CONTRIBUTORS_FILE=${CONTRIBUTORS_FILE=$CUR_DIR/StorageSystemContributors.generated.cpp} -git shortlog --summary | perl -lnE 's/^\s+\d+\s+(.+)/ "$1",/; next unless $1; say $_' > $CONTRIBUTORS_FILE.tmp +# if you don't specify HEAD here, without terminal `git shortlog` would expect input from stdin +git shortlog HEAD --summary | perl -lnE 's/^\s+\d+\s+(.+)/ "$1",/; next unless $1; say $_' > $CONTRIBUTORS_FILE.tmp # If git history not available - dont make target file if [ ! -s $CONTRIBUTORS_FILE.tmp ]; then From b946d2770e48261a39a2c7dfa2971093cc38f929 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Sep 2019 01:54:27 +0300 Subject: [PATCH 018/215] Merge pull request #6977 from Akazz/minor_fixes_in_tests-2 Reworked flapping test - 00715_fetch_merged_or_mutated_part_zookeeper (cherry picked from commit d7681d0239edadfbd50973220ede08ec217d3d44) --- ..._fetch_merged_or_mutated_part_zookeeper.sh | 52 +++++++++++++++++++ ...fetch_merged_or_mutated_part_zookeeper.sql | 42 --------------- 2 files changed, 52 insertions(+), 42 deletions(-) create mode 100755 dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh delete mode 100644 dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sql diff --git a/dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh b/dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh new file mode 100755 index 00000000000..a7cb79908ae --- /dev/null +++ b/dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib + + +${CLICKHOUSE_CLIENT} -n --query=" + DROP TABLE IF EXISTS fetches_r1; + DROP TABLE IF EXISTS fetches_r2" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE fetches_r1(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/fetches', 'r1') ORDER BY x" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE fetches_r2(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/fetches', 'r2') ORDER BY x \ + SETTINGS prefer_fetch_merged_part_time_threshold=0, \ + prefer_fetch_merged_part_size_threshold=0" + +${CLICKHOUSE_CLIENT} -n --query=" + INSERT INTO fetches_r1 VALUES (1); + INSERT INTO fetches_r1 VALUES (2); + INSERT INTO fetches_r1 VALUES (3)" + +${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r2" +${CLICKHOUSE_CLIENT} --query="DETACH TABLE fetches_r2" + +${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE fetches_r1 PARTITION tuple() FINAL" --replication_alter_partitions_sync=0 +${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r1" + +# After attach replica r2 should fetch the merged part from r1. +${CLICKHOUSE_CLIENT} --query="ATTACH TABLE fetches_r2" +${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r2" + +${CLICKHOUSE_CLIENT} --query="SELECT '*** Check data after fetch of merged part ***'" +${CLICKHOUSE_CLIENT} --query="SELECT _part, * FROM fetches_r2 ORDER BY x" + +${CLICKHOUSE_CLIENT} --query="DETACH TABLE fetches_r2" + +# Add mutation that doesn't change data. +${CLICKHOUSE_CLIENT} --query="ALTER TABLE fetches_r1 DELETE WHERE x = 0" --replication_alter_partitions_sync=0 + +wait_for_mutation "fetches_r1" "0000000000" +${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r1" + +# After attach replica r2 should compare checksums for mutated part and clone the local part. +${CLICKHOUSE_CLIENT} --query="ATTACH TABLE fetches_r2" +${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r2" + +${CLICKHOUSE_CLIENT} --query="SELECT '*** Check data after fetch/clone of mutated part ***'" +${CLICKHOUSE_CLIENT} --query="SELECT _part, * FROM fetches_r2 ORDER BY x" + +${CLICKHOUSE_CLIENT} -n --query=" + DROP TABLE fetches_r1; + DROP TABLE fetches_r2" diff --git a/dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sql b/dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sql deleted file mode 100644 index 9a1c1b77cae..00000000000 --- a/dbms/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS fetches_r1; -DROP TABLE IF EXISTS fetches_r2; - -CREATE TABLE fetches_r1(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/fetches', 'r1') ORDER BY x; -CREATE TABLE fetches_r2(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/fetches', 'r2') ORDER BY x - SETTINGS prefer_fetch_merged_part_time_threshold=0, - prefer_fetch_merged_part_size_threshold=0; - -INSERT INTO fetches_r1 VALUES (1); -INSERT INTO fetches_r1 VALUES (2); -INSERT INTO fetches_r1 VALUES (3); - -SYSTEM SYNC REPLICA fetches_r2; - -DETACH TABLE fetches_r2; - -SET replication_alter_partitions_sync=0; -OPTIMIZE TABLE fetches_r1 PARTITION tuple() FINAL; -SYSTEM SYNC REPLICA fetches_r1; - --- After attach replica r2 should fetch the merged part from r1. -ATTACH TABLE fetches_r2; -SYSTEM SYNC REPLICA fetches_r2; - -SELECT '*** Check data after fetch of merged part ***'; -SELECT _part, * FROM fetches_r2 ORDER BY x; - -DETACH TABLE fetches_r2; - --- Add mutation that doesn't change data. -ALTER TABLE fetches_r1 DELETE WHERE x = 0; -SYSTEM SYNC REPLICA fetches_r1; - --- After attach replica r2 should compare checksums for mutated part and clone the local part. -ATTACH TABLE fetches_r2; -SYSTEM SYNC REPLICA fetches_r2; - -SELECT '*** Check data after fetch/clone of mutated part ***'; -SELECT _part, * FROM fetches_r2 ORDER BY x; - -DROP TABLE fetches_r1; -DROP TABLE fetches_r2; From 1fd0ec6578c336cbc598be40f489b4e781afff12 Mon Sep 17 00:00:00 2001 From: akuzm <36882414+akuzm@users.noreply.github.com> Date: Tue, 17 Sep 2019 14:57:38 +0300 Subject: [PATCH 019/215] Merge pull request #6937 from amosbird/ubmemfix In hash tables, properly initialize zero key cell. (cherry picked from commit 75dc7d331e3c4db4ea6490fadb996bc02e175bd9) --- dbms/src/Common/HashTable/HashTable.h | 14 ++++++++++++-- .../01011_group_uniq_array_memsan.reference | 1 + .../0_stateless/01011_group_uniq_array_memsan.sql | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.reference create mode 100644 dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.sql diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index d29459a90d5..5c389af9828 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -224,8 +224,18 @@ private: public: bool hasZero() const { return has_zero; } - void setHasZero() { has_zero = true; } - void clearHasZero() { has_zero = false; } + + void setHasZero() + { + has_zero = true; + new (zeroValue()) Cell(); + } + + void clearHasZero() + { + has_zero = false; + zeroValue()->~Cell(); + } Cell * zeroValue() { return reinterpret_cast(&zero_value_storage); } const Cell * zeroValue() const { return reinterpret_cast(&zero_value_storage); } diff --git a/dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.reference b/dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.reference new file mode 100644 index 00000000000..b7c55c59479 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.reference @@ -0,0 +1 @@ +[[],[2]] diff --git a/dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.sql b/dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.sql new file mode 100644 index 00000000000..b8c16e48c42 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01011_group_uniq_array_memsan.sql @@ -0,0 +1 @@ +select groupUniqArray(v) from values('id int, v Array(int)', (1, [2]), (1, [])) group by id; From 1712ef0d5d79214e44afcaf1aa7a3fcdcec222c0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 15 Sep 2019 09:46:10 +0300 Subject: [PATCH 020/215] Merge pull request #6934 from abyss7/CLICKHOUSE-4643 Name temporary external table with existing subquery alias (cherry picked from commit 831eebe4dc1721dea4dd1ba034c34727774d36e1) --- .../Interpreters/GlobalSubqueriesVisitor.h | 12 ++++++++---- .../01009_global_array_join_names.reference | 0 .../01009_global_array_join_names.sql | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01009_global_array_join_names.reference create mode 100644 dbms/tests/queries/0_stateless/01009_global_array_join_names.sql diff --git a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h index 926e6afd1c2..6c380162af4 100644 --- a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h @@ -78,12 +78,16 @@ public: return; } - /// Generate the name for the external table. - String external_table_name = "_data" + toString(external_table_id); - while (external_tables.count(external_table_name)) + String external_table_name = subquery_or_table_name->tryGetAlias(); + if (external_table_name.empty()) { - ++external_table_id; + /// Generate the name for the external table. external_table_name = "_data" + toString(external_table_id); + while (external_tables.count(external_table_name)) + { + ++external_table_id; + external_table_name = "_data" + toString(external_table_id); + } } auto interpreter = interpretSubquery(subquery_or_table_name, context, subquery_depth, {}); diff --git a/dbms/tests/queries/0_stateless/01009_global_array_join_names.reference b/dbms/tests/queries/0_stateless/01009_global_array_join_names.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01009_global_array_join_names.sql b/dbms/tests/queries/0_stateless/01009_global_array_join_names.sql new file mode 100644 index 00000000000..f3cbd0cf44d --- /dev/null +++ b/dbms/tests/queries/0_stateless/01009_global_array_join_names.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1 (a UInt8, b Array(DateTime)) ENGINE Memory; +CREATE TABLE test2 as test1 ENGINE Distributed(test_shard_localhost, currentDatabase(), test1); + +INSERT INTO test1 VALUES (1, [1, 2, 3]); + +SELECT 1 +FROM test2 AS test2 +ARRAY JOIN arrayFilter(t -> (t GLOBAL IN + ( + SELECT DISTINCT now() AS `ym:a` + WHERE 1 + )), test2.b) AS test2_b +WHERE 1; + +DROP TABLE test1; +DROP TABLE test2; From 1989b50166760e2aba201c1aa50f7aebce780cb6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 13 Sep 2019 08:57:30 +0300 Subject: [PATCH 021/215] Merge pull request #6928 from proller/fix27 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLICKHOUSE-4652 Another fix for АrrayEnumerateUniqRanked with empty arrays (cherry picked from commit ba40858094e06358d5d76f008059940ef9808823) --- dbms/src/Functions/array/arrayEnumerateRanked.h | 5 +---- .../queries/0_stateless/00909_arrayEnumerateUniq.reference | 3 +++ dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/array/arrayEnumerateRanked.h b/dbms/src/Functions/array/arrayEnumerateRanked.h index a1019ba83bf..de7656b70da 100644 --- a/dbms/src/Functions/array/arrayEnumerateRanked.h +++ b/dbms/src/Functions/array/arrayEnumerateRanked.h @@ -336,10 +336,6 @@ void FunctionArrayEnumerateRankedExtended::executeMethodImpl( /// Skipping offsets if no data in this array if (prev_off == off) { - - if (depth_to_look > 2) - want_clear = true; - if (depth_to_look >= 2) { /// Advance to the next element of the parent array. @@ -357,6 +353,7 @@ void FunctionArrayEnumerateRankedExtended::executeMethodImpl( { last_offset_by_depth[depth] = (*offsets_by_depth[depth])[current_offset_n_by_depth[depth]]; ++current_offset_n_by_depth[depth]; + want_clear = true; } else { diff --git a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference index 595dcdf3803..6f9a0bb9ea3 100644 --- a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference +++ b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference @@ -284,3 +284,6 @@ a1,a2 12 [1,2] [[1],[],[2],[],[3],[],[4],[],[5],[],[6],[],[7],[],[8],[],[9]] [[],[1],[],[2],[],[3],[],[4],[],[5],[],[6],[],[7],[],[8]] [[1],[2],[],[3]] +-- empty corner +[[],[1],[]] [[],[1],[]] +[[1]] [[1]] diff --git a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql index 9cf82a368d6..33097c99272 100644 --- a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql +++ b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql @@ -313,3 +313,6 @@ SELECT arrayEnumerateUniqRanked([[1], [], [1]]); SELECT arrayEnumerateUniqRanked([[1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1]]); SELECT arrayEnumerateUniqRanked([[], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1]]); SELECT arrayEnumerateUniqRanked([[1], [1], [], [1]]); + +select '-- empty corner'; +SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[],[1],[]] AS a UNION ALL SELECT [[1]] AS a ) ORDER BY a ASC ); From 497dffbf8bccdc5f0aceb7562e8f325c2352b2b2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 19 Sep 2019 22:21:38 +0300 Subject: [PATCH 022/215] Auto version update to [19.14.6.12] [54425] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 565f2157390..787f84ca33f 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) set(VERSION_PATCH 6) -set(VERSION_GITHASH c6464f2813c7d5cb939af11759d05705ba0f15ee) -set(VERSION_DESCRIBE v19.14.6.1-prestable) -set(VERSION_STRING 19.14.6.1) +set(VERSION_GITHASH 1989b50166760e2aba201c1aa50f7aebce780cb6) +set(VERSION_DESCRIBE v19.14.6.12-stable) +set(VERSION_STRING 19.14.6.12) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From 7e157f15180268be33bd0334e78a4d36e69fb82b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 19 Sep 2019 22:21:55 +0300 Subject: [PATCH 023/215] Auto version update to [19.14.7.1] [54425] --- dbms/cmake/version.cmake | 6 +++--- .../StorageSystemContributors.generated.cpp | 18 ++++++++++++++++++ debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 6 files changed, 26 insertions(+), 8 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 787f84ca33f..06ee301be79 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) -set(VERSION_PATCH 6) +set(VERSION_PATCH 7) set(VERSION_GITHASH 1989b50166760e2aba201c1aa50f7aebce780cb6) -set(VERSION_DESCRIBE v19.14.6.12-stable) -set(VERSION_STRING 19.14.6.12) +set(VERSION_DESCRIBE v19.14.7.1-stable) +set(VERSION_STRING 19.14.7.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp index debd1fe2dc6..3b6ccfc4db2 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -6,6 +6,7 @@ const char * auto_contributors[] { "Aleksandra (Ася)", "Alex Bocharov", "Alex Krash", + "Alex Ryndin", "Alex Zatelepin", "Alexander Avdonkin", "Alexander Ermolaev", @@ -21,6 +22,7 @@ const char * auto_contributors[] { "Alexander Millin", "Alexander Mochalin", "Alexander Prudaev", + "Alexander Rodin", "Alexander Sapin", "Alexander Tokmakov", "Alexander Tretiakov", @@ -69,6 +71,7 @@ const char * auto_contributors[] { "Bakhtiyor Ruziev", "BanyRule", "BayoNet", + "Big Elephant", "BlahGeek", "Bogdan", "Bogdan Voronin", @@ -96,6 +99,7 @@ const char * auto_contributors[] { "Dmitry Petukhov", "Dmitry Rubashkin", "Dmitry S..ky / skype: dvska-at-skype", + "Doge", "Elghazal Ahmed", "Emmanuel Donin de Rosière", "Eric", @@ -108,6 +112,7 @@ const char * auto_contributors[] { "Fadi Hadzh", "FeehanG", "Flowyi", + "Francisco Barón", "Fruit of Eden", "Gary Dotzler", "George", @@ -235,6 +240,8 @@ const char * auto_contributors[] { "Pawel Rog", "Persiyanov Dmitriy Andreevich", "Quid37", + "Rafael David Tinoco", + "Ramazan Polat", "Ravengg", "Reto Kromer", "Roman Lipovsky", @@ -271,6 +278,7 @@ const char * auto_contributors[] { "The-Alchemist", "Tobias Adamson", "Tsarkova Anastasia", + "VDimir", "Vadim", "Vadim Plakhtinskiy", "Vadim Skipin", @@ -284,6 +292,7 @@ const char * auto_contributors[] { "Victor Tarnavsky", "Vitaliy Karnienko", "Vitaliy Lyudvichenko", + "Vitaliy Zakaznikov", "Vitaly Baranov", "Vitaly Samigullin", "Vivien Maisonneuve", @@ -296,6 +305,7 @@ const char * auto_contributors[] { "Vladislav Smirnov", "Vojtech Splichal", "Vsevolod Orlov", + "Vxider", "Vyacheslav Alipov", "Weiqing Xu", "William Shallum", @@ -312,9 +322,11 @@ const char * auto_contributors[] { "abdrakhmanov", "abyss7", "achulkov2", + "akazz", "akonyaev", "akuzm", "alesapin", + "alex-zaitsev", "alexander kozhikhov", "alexey-milovidov", "andrewsg", @@ -339,6 +351,8 @@ const char * auto_contributors[] { "davydovska", "decaseal", "dimarub2000", + "dmitrii", + "dmitriiut", "dmitry kuzmin", "eejoin", "egatov", @@ -363,6 +377,7 @@ const char * auto_contributors[] { "javi", "javi santana", "kmeaw", + "kreuzerkrieg", "ks1322", "kshvakov", "leozhang", @@ -389,6 +404,7 @@ const char * auto_contributors[] { "olegkv", "orantius", "peshkurov", + "philip.han", "proller", "pyos", "qianlixiang", @@ -399,6 +415,7 @@ const char * auto_contributors[] { "santaux", "sdk2", "serebrserg", + "sev7e0", "shangshujie", "shedx", "simon-says", @@ -408,6 +425,7 @@ const char * auto_contributors[] { "sundyli", "svladykin", "tai", + "tavplubix", "topvisor", "unknown", "urgordeadbeef", diff --git a/debian/changelog b/debian/changelog index 4d6d8be0b76..ac8f5586b00 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.14.6.1) unstable; urgency=low +clickhouse (19.14.7.1) unstable; urgency=low * Modified source code - -- clickhouse-release Wed, 18 Sep 2019 17:53:49 +0300 + -- clickhouse-release Thu, 19 Sep 2019 22:21:52 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 771e6405414..a3ea01d3a17 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.6.* +ARG version=19.14.7.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 0337faa86ff..dfc0e98a7ca 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.6.* +ARG version=19.14.7.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 62cc9c0b958..cc4f7b66320 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.14.6.* +ARG version=19.14.7.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 2c930c6d803d1ffa99b3b5704e88a2471599656e Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 10 Sep 2019 00:40:40 +0300 Subject: [PATCH 024/215] Build fixes (Orc, ...) (#6835) * Fix build * cmake: fix cpuinfo * Fix includes after processors merge Conflicts: dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp dbms/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp dbms/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp * Fix build in gcc8 * fix test link * fix test link * Fix test link * link fix * Fix includes after processors merge 2 Conflicts: dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp * Fix includes after processors merge 3 * link fix * Fix likely/unlikely conflict with cython * Fix conflict with protobuf/stubs/atomicops.h * remove unlikely.h * Fix macos build (do not use timer_t) * wip * Fix build (orc, ...) * Missing files * Try fix * fix hdfs * Fix llvm 7.1 find --- CMakeLists.txt | 2 +- cmake/find_hdfs3.cmake | 35 +-- cmake/find_llvm.cmake | 22 +- cmake/find_orc.cmake | 40 ++- cmake/find_parquet.cmake | 1 + contrib/CMakeLists.txt | 17 +- contrib/arrow-cmake/CMakeLists.txt | 24 +- contrib/arrow-cmake/orc_check.cmake | 126 ++++++++++ contrib/libhdfs3-cmake/CMakeLists.txt | 10 +- contrib/orc-cmake/CMakeLists.txt | 229 ++++++++++++++++++ .../Formats/Impl/ArrowColumnToCHColumn.cpp | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.h | 2 +- 12 files changed, 443 insertions(+), 67 deletions(-) create mode 100644 contrib/arrow-cmake/orc_check.cmake create mode 100644 contrib/orc-cmake/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 5330c8daeb5..578e25b8e16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -343,7 +343,7 @@ include (cmake/find_hyperscan.cmake) include (cmake/find_simdjson.cmake) include (cmake/find_rapidjson.cmake) include (cmake/find_fastops.cmake) -include (cmake/find_orc.cmake) +#include (cmake/find_orc.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/cmake/find_hdfs3.cmake b/cmake/find_hdfs3.cmake index 4c29047fc75..9c593d3266a 100644 --- a/cmake/find_hdfs3.cmake +++ b/cmake/find_hdfs3.cmake @@ -1,24 +1,29 @@ -if (NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF) - option (ENABLE_HDFS "Enable HDFS" ${NOT_UNBUNDLED}) -endif () +if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF) + option(ENABLE_HDFS "Enable HDFS" 1) +endif() -if (ENABLE_HDFS AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") - message (WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive") - set (ENABLE_HDFS 0) -endif () +if(ENABLE_HDFS) +option(USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ${NOT_UNBUNDLED}) -if (ENABLE_HDFS) -option (USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ON) +if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") + if(USE_INTERNAL_HDFS3_LIBRARY) + message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive") + endif() + set(MISSING_INTERNAL_HDFS3_LIBRARY 1) + set(USE_INTERNAL_HDFS3_LIBRARY 0) +endif() -if (NOT USE_INTERNAL_HDFS3_LIBRARY) - find_package(hdfs3) -endif () +if(NOT USE_INTERNAL_HDFS3_LIBRARY) + find_library(HDFS3_LIBRARY hdfs3) + find_path(HDFS3_INCLUDE_DIR NAMES hdfs/hdfs.h PATHS ${HDFS3_INCLUDE_PATHS}) +endif() -if (HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR) +if(HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR) set(USE_HDFS 1) -elseif (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY) +elseif(NOT MISSING_INTERNAL_HDFS3_LIBRARY AND LIBGSASL_LIBRARY AND LIBXML2_LIBRARY) set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include") set(HDFS3_LIBRARY hdfs3) + set(USE_INTERNAL_HDFS3_LIBRARY 1) set(USE_HDFS 1) else() set(USE_INTERNAL_HDFS3_LIBRARY 0) @@ -26,4 +31,4 @@ endif() endif() -message (STATUS "Using hdfs3=${USE_HDFS}: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}") +message(STATUS "Using hdfs3=${USE_HDFS}: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}") diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index 3692a98b979..c668416c0c0 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -18,22 +18,12 @@ if (ENABLE_EMBEDDED_COMPILER) elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") find_package(LLVM ${CMAKE_CXX_COMPILER_VERSION} CONFIG PATHS ${LLVM_PATHS}) else () - #TODO: - #if(NOT LLVM_FOUND) - # find_package(LLVM 9 CONFIG PATHS ${LLVM_PATHS}) - #endif() - #if(NOT LLVM_FOUND) - # find_package(LLVM 8 CONFIG PATHS ${LLVM_PATHS}) - #endif() - if (NOT LLVM_FOUND) - find_package (LLVM 7 CONFIG PATHS ${LLVM_PATHS}) - endif () - if (NOT LLVM_FOUND) - find_package (LLVM 6 CONFIG PATHS ${LLVM_PATHS}) - endif () - if (NOT LLVM_FOUND) - find_package (LLVM 5 CONFIG PATHS ${LLVM_PATHS}) - endif () + # TODO: 9 8 + foreach(llvm_v 7.1 7 6 5) + if (NOT LLVM_FOUND) + find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS}) + endif () + endforeach () endif () if (LLVM_FOUND) diff --git a/cmake/find_orc.cmake b/cmake/find_orc.cmake index 3676bec1b6b..50e563b04b4 100644 --- a/cmake/find_orc.cmake +++ b/cmake/find_orc.cmake @@ -1,8 +1,38 @@ -##TODO replace hardcode to find procedure +option (ENABLE_ORC "Enable ORC" 1) -set(USE_ORC 0) -set(USE_INTERNAL_ORC_LIBRARY ON) +if(ENABLE_ORC) +option (USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled" ${NOT_UNBUNDLED}) -if (ARROW_LIBRARY) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh") + if(USE_INTERNAL_ORC_LIBRARY) + message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init --recursive") + set(USE_INTERNAL_ORC_LIBRARY 0) + endif() + set(MISSING_INTERNAL_ORC_LIBRARY 1) +endif () + +if (NOT USE_INTERNAL_ORC_LIBRARY) + find_package(orc) +endif () + +#if (USE_INTERNAL_ORC_LIBRARY) +#find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h) +#find_library(CYRUS_SASL_SHARED_LIB sasl2) +#if (NOT CYRUS_SASL_INCLUDE_DIR OR NOT CYRUS_SASL_SHARED_LIB) +# set(USE_ORC 0) +#endif() +#endif() + +if (ORC_LIBRARY AND ORC_INCLUDE_DIR) set(USE_ORC 1) -endif() \ No newline at end of file +elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY) + set(ORC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include") + set(ORC_LIBRARY orc) + set(USE_ORC 1) +else() + set(USE_INTERNAL_ORC_LIBRARY 0) +endif() + +endif() + +message (STATUS "Using internal=${USE_INTERNAL_ORC_LIBRARY} orc=${USE_ORC}: ${ORC_INCLUDE_DIR} : ${ORC_LIBRARY}") diff --git a/cmake/find_parquet.cmake b/cmake/find_parquet.cmake index 63f589a9ea5..5c5bc664113 100644 --- a/cmake/find_parquet.cmake +++ b/cmake/find_parquet.cmake @@ -62,6 +62,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD) endif() set(USE_PARQUET 1) + set(USE_ORC 1) endif() endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 96462de0190..54fdc4d69e0 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -10,19 +10,6 @@ endif () set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) -if (USE_INTERNAL_ORC_LIBRARY) - set(BUILD_JAVA OFF) - set (ANALYZE_JAVA OFF) - set (BUILD_CPP_TESTS OFF) - set (BUILD_TOOLS OFF) - option(BUILD_JAVA OFF) - option (ANALYZE_JAVA OFF) - option (BUILD_CPP_TESTS OFF) - option (BUILD_TOOLS OFF) - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/contrib/orc/cmake_modules") - add_subdirectory(orc) -endif() - if (USE_INTERNAL_BOOST_LIBRARY) add_subdirectory (boost-cmake) endif () @@ -327,3 +314,7 @@ endif() if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() + +#if (USE_INTERNAL_ORC_LIBRARY) +# add_subdirectory(orc-cmake) +#endif () diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index ba1ddc2414a..cfd57f2b296 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -56,11 +56,11 @@ set(ORC_SOURCE_WRAP_DIR ${ORC_SOURCE_DIR}/wrap) set(ORC_BUILD_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/src) set(ORC_BUILD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/include) -set(GOOGLE_PROTOBUF_DIR ${ClickHouse_SOURCE_DIR}/contrib/protobuf/src/) +set(GOOGLE_PROTOBUF_DIR ${Protobuf_INCLUDE_DIR}/) set(ORC_ADDITION_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(ARROW_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src) -set(PROTOBUF_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/../protobuf/cmake/protoc) +set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) set(PROTO_DIR ${ORC_SOURCE_DIR}/../proto) @@ -70,14 +70,10 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc --cpp_out="${CMAKE_CURRENT_BINARY_DIR}" "${PROTO_DIR}/orc_proto.proto") -include_directories(SYSTEM ${ORC_INCLUDE_DIR}) -include_directories(SYSTEM ${ORC_SOURCE_SRC_DIR}) -include_directories(SYSTEM ${ORC_SOURCE_WRAP_DIR}) -include_directories(SYSTEM ${GOOGLE_PROTOBUF_DIR}) -include_directories(SYSTEM ${ORC_BUILD_SRC_DIR}) -include_directories(SYSTEM ${ORC_BUILD_INCLUDE_DIR}) -include_directories(SYSTEM ${ORC_ADDITION_SOURCE_DIR}) -include_directories(SYSTEM ${ARROW_SRC_DIR}) +include(${ClickHouse_SOURCE_DIR}/contrib/orc/cmake_modules/CheckSourceCompiles.cmake) +include(orc_check.cmake) +configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh") +configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh") set(ORC_SRCS @@ -232,6 +228,14 @@ if (ARROW_WITH_ZSTD) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY}) endif() +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_INCLUDE_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_SOURCE_SRC_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_SOURCE_WRAP_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${GOOGLE_PROTOBUF_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_SRC_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR}) # === parquet diff --git a/contrib/arrow-cmake/orc_check.cmake b/contrib/arrow-cmake/orc_check.cmake new file mode 100644 index 00000000000..ec1e53cc649 --- /dev/null +++ b/contrib/arrow-cmake/orc_check.cmake @@ -0,0 +1,126 @@ +# Not changed part of contrib/orc/c++/src/CMakeLists.txt + +INCLUDE(CheckCXXSourceCompiles) + +CHECK_CXX_SOURCE_COMPILES(" + #include + #include + int main(int,char*[]){ + int f = open(\"/x/y\", O_RDONLY); + char buf[100]; + return pread(f, buf, 100, 1000) == 0; + }" + HAS_PREAD +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int,char*[]){ + struct tm time2020; + return !strptime(\"2020-02-02 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2020); + }" + HAS_STRPTIME +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int,char* argv[]){ + return static_cast(std::stoll(argv[0])); + }" + HAS_STOLL +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + #include + int main(int,char*[]){ + int64_t x = 1; printf(\"%lld\",x); + }" + INT64_IS_LL +) + +CHECK_CXX_SOURCE_COMPILES(" + #ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored \"-Wdeprecated\" + #pragma clang diagnostic pop + #elif defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored \"-Wdeprecated\" + #pragma GCC diagnostic pop + #elif defined(_MSC_VER) + #pragma warning( push ) + #pragma warning( disable : 4996 ) + #pragma warning( pop ) + #else + unknownCompiler! + #endif + int main(int, char *[]) {}" + HAS_DIAGNOSTIC_PUSH +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int, char *[]) { + return std::isnan(1.0f); + }" + HAS_STD_ISNAN +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int, char *[]) { + std::mutex test_mutex; + std::lock_guard lock_mutex(test_mutex); + }" + HAS_STD_MUTEX +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + std::string func() { + std::string var = \"test\"; + return std::move(var); + } + int main(int, char *[]) {}" + NEEDS_REDUNDANT_MOVE +) + +INCLUDE(CheckCXXSourceRuns) + +CHECK_CXX_SOURCE_RUNS(" + #include + int main(int, char *[]) { + time_t t = -14210715; // 1969-07-20 12:34:45 + struct tm *ptm = gmtime(&t); + return !(ptm && ptm->tm_year == 69); + }" + HAS_PRE_1970 +) + +CHECK_CXX_SOURCE_RUNS(" + #include + #include + int main(int, char *[]) { + setenv(\"TZ\", \"America/Los_Angeles\", 1); + tzset(); + struct tm time2037; + struct tm time2038; + strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037); + strptime(\"2038-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2038); + return mktime(&time2038) - mktime(&time2037) != 31536000; + }" + HAS_POST_2038 +) + +set(CMAKE_REQUIRED_INCLUDES ${ZLIB_INCLUDE_DIR}) +set(CMAKE_REQUIRED_LIBRARIES zlib) +CHECK_CXX_SOURCE_COMPILES(" + #define Z_PREFIX + #include + z_stream strm; + int main(int, char *[]) { + deflateReset(&strm); + }" + NEEDS_Z_PREFIX +) diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index 8ec14f897b9..e1ba7225b0f 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -199,17 +199,17 @@ if (WITH_KERBEROS) endif() target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR}) -target_link_libraries(hdfs3 ${LIBGSASL_LIBRARY}) +target_link_libraries(hdfs3 PRIVATE ${LIBGSASL_LIBRARY}) if (WITH_KERBEROS) - target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES}) + target_link_libraries(hdfs3 PRIVATE ${KERBEROS_LIBRARIES}) endif() -target_link_libraries(hdfs3 ${LIBXML2_LIBRARY}) +target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY}) # inherit from parent cmake target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR}) -target_link_libraries(hdfs3 ${Protobuf_LIBRARY}) +target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY}) if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES) target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR}) - target_link_libraries(hdfs3 ${OPENSSL_LIBRARIES}) + target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES}) endif() diff --git a/contrib/orc-cmake/CMakeLists.txt b/contrib/orc-cmake/CMakeLists.txt new file mode 100644 index 00000000000..066ba00aede --- /dev/null +++ b/contrib/orc-cmake/CMakeLists.txt @@ -0,0 +1,229 @@ +# modifyed copy of contrib/orc/c++/src/CMakeLists.txt +set(LIBRARY_INCLUDE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include) +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/src) + +set(PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIR}) +set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}") + +INCLUDE(CheckCXXSourceCompiles) + +CHECK_CXX_SOURCE_COMPILES(" + #include + #include + int main(int,char*[]){ + int f = open(\"/x/y\", O_RDONLY); + char buf[100]; + return pread(f, buf, 100, 1000) == 0; + }" + HAS_PREAD +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int,char*[]){ + struct tm time2020; + return !strptime(\"2020-02-02 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2020); + }" + HAS_STRPTIME +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int,char* argv[]){ + return static_cast(std::stoll(argv[0])); + }" + HAS_STOLL +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + #include + int main(int,char*[]){ + int64_t x = 1; printf(\"%lld\",x); + }" + INT64_IS_LL +) + +CHECK_CXX_SOURCE_COMPILES(" + #ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored \"-Wdeprecated\" + #pragma clang diagnostic pop + #elif defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored \"-Wdeprecated\" + #pragma GCC diagnostic pop + #elif defined(_MSC_VER) + #pragma warning( push ) + #pragma warning( disable : 4996 ) + #pragma warning( pop ) + #else + unknownCompiler! + #endif + int main(int, char *[]) {}" + HAS_DIAGNOSTIC_PUSH +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int, char *[]) { + return std::isnan(1.0f); + }" + HAS_STD_ISNAN +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + int main(int, char *[]) { + std::mutex test_mutex; + std::lock_guard lock_mutex(test_mutex); + }" + HAS_STD_MUTEX +) + +CHECK_CXX_SOURCE_COMPILES(" + #include + std::string func() { + std::string var = \"test\"; + return std::move(var); + } + int main(int, char *[]) {}" + NEEDS_REDUNDANT_MOVE +) + +INCLUDE(CheckCXXSourceRuns) + +CHECK_CXX_SOURCE_RUNS(" + #include + int main(int, char *[]) { + time_t t = -14210715; // 1969-07-20 12:34:45 + struct tm *ptm = gmtime(&t); + return !(ptm && ptm->tm_year == 69); + }" + HAS_PRE_1970 +) + +CHECK_CXX_SOURCE_RUNS(" + #include + #include + int main(int, char *[]) { + setenv(\"TZ\", \"America/Los_Angeles\", 1); + tzset(); + struct tm time2037; + struct tm time2038; + strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037); + strptime(\"2038-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2038); + return mktime(&time2038) - mktime(&time2037) != 31536000; + }" + HAS_POST_2038 +) + +set(CMAKE_REQUIRED_INCLUDES ${ZLIB_INCLUDE_DIR}) +set(CMAKE_REQUIRED_LIBRARIES zlib) +CHECK_CXX_SOURCE_COMPILES(" + #define Z_PREFIX + #include + z_stream strm; + int main(int, char *[]) { + deflateReset(&strm); + }" + NEEDS_Z_PREFIX +) + +configure_file ( + "${LIBRARY_DIR}/Adaptor.hh.in" + "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh" + ) + + +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.cc + COMMAND ${PROTOBUF_EXECUTABLE} + -I${ClickHouse_SOURCE_DIR}/contrib/orc/proto + --cpp_out="${CMAKE_CURRENT_BINARY_DIR}" + "${ClickHouse_SOURCE_DIR}/contrib/orc/proto/orc_proto.proto" +) + +set(SOURCE_FILES + "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh" + ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h + ${LIBRARY_DIR}/io/InputStream.cc + ${LIBRARY_DIR}/io/OutputStream.cc + ${LIBRARY_DIR}/wrap/orc-proto-wrapper.cc + ${LIBRARY_DIR}/Adaptor.cc + ${LIBRARY_DIR}/ByteRLE.cc + ${LIBRARY_DIR}/ColumnPrinter.cc + ${LIBRARY_DIR}/ColumnReader.cc + ${LIBRARY_DIR}/ColumnWriter.cc + ${LIBRARY_DIR}/Common.cc + ${LIBRARY_DIR}/Compression.cc + ${LIBRARY_DIR}/Exceptions.cc + ${LIBRARY_DIR}/Int128.cc + ${LIBRARY_DIR}/LzoDecompressor.cc + ${LIBRARY_DIR}/MemoryPool.cc + ${LIBRARY_DIR}/OrcFile.cc + ${LIBRARY_DIR}/Reader.cc + ${LIBRARY_DIR}/RLEv1.cc + ${LIBRARY_DIR}/RLEv2.cc + ${LIBRARY_DIR}/RLE.cc + ${LIBRARY_DIR}/Statistics.cc + ${LIBRARY_DIR}/StripeStream.cc + ${LIBRARY_DIR}/Timezone.cc + ${LIBRARY_DIR}/TypeImpl.cc + ${LIBRARY_DIR}/Vector.cc + ${LIBRARY_DIR}/Writer.cc + ) + +if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP) + set(SOURCE_FILES ${SOURCE_FILES} ${LIBRARY_DIR}/OrcHdfsFile.cc) +endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP) + +#list(TRANSFORM SOURCE_FILES PREPEND ${LIBRARY_DIR}/) + +configure_file ( + "${LIBRARY_INCLUDE}/orc/orc-config.hh.in" + "${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh" + ) + +add_library (orc ${SOURCE_FILES}) + +target_include_directories (orc + PRIVATE + ${LIBRARY_INCLUDE} + ${LIBRARY_DIR} + #PUBLIC + ${CMAKE_CURRENT_BINARY_DIR} + PRIVATE + ${PROTOBUF_INCLUDE_DIR} + ${ZLIB_INCLUDE_DIR} + ${SNAPPY_INCLUDE_DIR} + ${LZ4_INCLUDE_DIR} + ${LIBHDFSPP_INCLUDE_DIR} + ) + +target_link_libraries (orc PRIVATE + ${Protobuf_LIBRARY} + ${ZLIB_LIBRARIES} + ${SNAPPY_LIBRARY} + ${LZ4_LIBRARY} + ${LIBHDFSPP_LIBRARIES} + ) + +#install(TARGETS orc DESTINATION lib) + +if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP) + add_definitions(-DBUILD_LIBHDFSPP) +endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 0cd5ffb03e0..edb8d5c15f4 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,7 +1,7 @@ #include "config_formats.h" #include "ArrowColumnToCHColumn.h" -#if USE_ORC or USE_PARQUET +#if USE_ORC || USE_PARQUET #include #include #include diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index b5f4732d107..34b58a80091 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -1,6 +1,6 @@ #include "config_formats.h" -#if USE_ORC or USE_PARQUET +#if USE_ORC || USE_PARQUET #include #include From 9690fbec093e84113214c39f18ac6ffe14c73983 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 1 Oct 2019 11:44:52 +0300 Subject: [PATCH 025/215] Empty commit From 599e63425abe31c42cdf57b176f881db6f1f9e49 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Sep 2019 20:32:51 +0300 Subject: [PATCH 026/215] Merge pull request #6990 from Akazz/timeout_for_sync_replica_cmd Fixed timeout mechanism for SYNC REPLICA command + simplified related code (cherry picked from commit 123b8cb43ccb3342805db79764dc03766289d904) --- .../Interpreters/InterpreterSystemQuery.cpp | 13 +++++++- .../Storages/StorageReplicatedMergeTree.cpp | 27 ++++++----------- ...3_sync_replica_timeout_zookeeper.reference | 1 + .../01013_sync_replica_timeout_zookeeper.sh | 30 +++++++++++++++++++ 4 files changed, 52 insertions(+), 19 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.reference create mode 100755 dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 6e434189c66..b6ed4dcb2e6 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -38,6 +38,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CANNOT_KILL; extern const int NOT_IMPLEMENTED; + extern const int TIMEOUT_EXCEEDED; } @@ -331,7 +332,17 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query) StoragePtr table = context.getTable(database_name, table_name); if (auto storage_replicated = dynamic_cast(table.get())) - storage_replicated->waitForShrinkingQueueSize(0, context.getSettingsRef().receive_timeout.value.milliseconds()); + { + LOG_TRACE(log, "Synchronizing entries in replica's queue with table's log and waiting for it to become empty"); + if (!storage_replicated->waitForShrinkingQueueSize(0, context.getSettingsRef().receive_timeout.totalMilliseconds())) + { + LOG_ERROR(log, "SYNC REPLICA " + database_name + "." + table_name + ": Timed out!"); + throw Exception( + "SYNC REPLICA " + database_name + "." + table_name + ": command timed out! " + "See the 'receive_timeout' setting", ErrorCodes::TIMEOUT_EXCEEDED); + } + LOG_TRACE(log, "SYNC REPLICA " + database_name + "." + table_name + ": OK"); + } else throw Exception("Table " + database_name + "." + table_name + " is not replicated", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index e5821c1bcaf..64e011df99c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -5126,38 +5126,29 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UInt64 max_wait_milliseconds) { + Stopwatch watch; + /// Let's fetch new log entries firstly queue.pullLogsToQueue(getZooKeeper()); - Stopwatch watch; - Poco::Event event; - std::atomic cond_reached{false}; - - auto callback = [&event, &cond_reached, queue_size] (size_t new_queue_size) + Poco::Event target_size_event; + auto callback = [&target_size_event, queue_size] (size_t new_queue_size) { if (new_queue_size <= queue_size) - cond_reached.store(true, std::memory_order_relaxed); - - event.set(); + target_size_event.set(); }; + const auto handler = queue.addSubscriber(std::move(callback)); - auto handler = queue.addSubscriber(std::move(callback)); - - while (true) + while (!target_size_event.tryWait(50)) { - event.tryWait(50); - if (max_wait_milliseconds && watch.elapsedMilliseconds() > max_wait_milliseconds) - break; - - if (cond_reached) - break; + return false; if (partial_shutdown_called) throw Exception("Shutdown is called for table", ErrorCodes::ABORTED); } - return cond_reached.load(std::memory_order_relaxed); + return true; } diff --git a/dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.reference b/dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.reference @@ -0,0 +1 @@ +OK diff --git a/dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh b/dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh new file mode 100755 index 00000000000..9e846b42591 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01013_sync_replica_timeout_zookeeper.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +R1=table_1013_1 +R2=table_1013_2 + +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $R1; + DROP TABLE IF EXISTS $R2; + + CREATE TABLE $R1 (x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1013', 'r1') ORDER BY x; + CREATE TABLE $R2 (x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1013', 'r2') ORDER BY x; + + SYSTEM STOP FETCHES $R2; + INSERT INTO $R1 VALUES (1) +" + +timeout 10s ${CLICKHOUSE_CLIENT} -n -q " + SET receive_timeout=1; + SYSTEM SYNC REPLICA $R2 +" 2>&1 | fgrep -q "DB::Exception: SYNC REPLICA ${CLICKHOUSE_DATABASE}.$R2: command timed out!" && echo 'OK' || echo 'Failed!' + +# By dropping tables all related SYNC REPLICA queries would be terminated as well +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $R2; + DROP TABLE IF EXISTS $R1; +" From d83de5e61f05da5848698768497dd551496e6f88 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Sep 2019 23:45:51 +0300 Subject: [PATCH 027/215] Merge pull request #7022 from 4ertus2/bugs Fix "Unknown identifier" in ORDER BY and GROUP BY with Multiple Joins (cherry picked from commit 2432a68009bc726f9efdfa94b85f348096b1ab6c) --- .../JoinToSubqueryTransformVisitor.cpp | 21 ++++++----- .../00847_multiple_join_same_column.sql | 28 +++++++++++---- .../00882_multiple_join_no_alias.reference | 8 +++++ .../00882_multiple_join_no_alias.sql | 35 +++++++++++++++++++ 4 files changed, 77 insertions(+), 15 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.reference create mode 100644 dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.sql diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index c6e72b4d252..b60e6533921 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -194,14 +194,14 @@ struct ColumnAliasesMatcher } }; - static bool needChildVisit(ASTPtr & node, const ASTPtr &) + static bool needChildVisit(const ASTPtr & node, const ASTPtr &) { if (node->as()) return false; return true; } - static void visit(ASTPtr & ast, Data & data) + static void visit(const ASTPtr & ast, Data & data) { if (auto * t = ast->as()) visit(*t, ast, data); @@ -210,8 +210,9 @@ struct ColumnAliasesMatcher throw Exception("Multiple JOIN do not support asterisks for complex queries yet", ErrorCodes::NOT_IMPLEMENTED); } - static void visit(ASTIdentifier & node, ASTPtr &, Data & data) + static void visit(const ASTIdentifier & const_node, const ASTPtr &, Data & data) { + ASTIdentifier & node = const_cast(const_node); /// we know it's not const if (node.isShort()) return; @@ -375,7 +376,7 @@ using RewriteVisitor = InDepthNodeVisitor; using SetSubqueryAliasMatcher = OneTypeMatcher; using SetSubqueryAliasVisitor = InDepthNodeVisitor; using ExtractAsterisksVisitor = ExtractAsterisksMatcher::Visitor; -using ColumnAliasesVisitor = InDepthNodeVisitor; +using ColumnAliasesVisitor = ConstInDepthNodeVisitor; using AppendSemanticMatcher = OneTypeMatcher; using AppendSemanticVisitor = InDepthNodeVisitor; @@ -403,15 +404,19 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast if (select.select()) { aliases_data.public_names = true; - ColumnAliasesVisitor(aliases_data).visit(select.refSelect()); + ColumnAliasesVisitor(aliases_data).visit(select.select()); aliases_data.public_names = false; } if (select.where()) - ColumnAliasesVisitor(aliases_data).visit(select.refWhere()); + ColumnAliasesVisitor(aliases_data).visit(select.where()); if (select.prewhere()) - ColumnAliasesVisitor(aliases_data).visit(select.refPrewhere()); + ColumnAliasesVisitor(aliases_data).visit(select.prewhere()); + if (select.orderBy()) + ColumnAliasesVisitor(aliases_data).visit(select.orderBy()); + if (select.groupBy()) + ColumnAliasesVisitor(aliases_data).visit(select.groupBy()); if (select.having()) - ColumnAliasesVisitor(aliases_data).visit(select.refHaving()); + ColumnAliasesVisitor(aliases_data).visit(select.having()); /// JOIN sections for (auto & child : select.tables()->children) diff --git a/dbms/tests/queries/0_stateless/00847_multiple_join_same_column.sql b/dbms/tests/queries/0_stateless/00847_multiple_join_same_column.sql index d444655a6ce..44b3fe202d3 100644 --- a/dbms/tests/queries/0_stateless/00847_multiple_join_same_column.sql +++ b/dbms/tests/queries/0_stateless/00847_multiple_join_same_column.sql @@ -16,30 +16,44 @@ left join y on (y.a = s.a and y.b = s.b) format Vertical; select t.a, s.b, s.a, s.b, y.a, y.b from t left join s on (t.a = s.a and s.b = t.b) -left join y on (y.a = s.a and y.b = s.b) format PrettyCompactNoEscapes; +left join y on (y.a = s.a and y.b = s.b) +order by t.a +format PrettyCompactNoEscapes; select t.a as t_a from t -left join s on s.a = t_a format PrettyCompactNoEscapes; +left join s on s.a = t_a +order by t.a +format PrettyCompactNoEscapes; select t.a, s.a as s_a from t left join s on s.a = t.a -left join y on y.b = s.b format PrettyCompactNoEscapes; +left join y on y.b = s.b +order by t.a +format PrettyCompactNoEscapes; select t.a, t.a, t.b as t_b from t left join s on t.a = s.a -left join y on y.b = s.b format PrettyCompactNoEscapes; +left join y on y.b = s.b +order by t.a +format PrettyCompactNoEscapes; select s.a, s.a, s.b as s_b, s.b from t left join s on s.a = t.a -left join y on s.b = y.b format PrettyCompactNoEscapes; +left join y on s.b = y.b +order by t.a +format PrettyCompactNoEscapes; select y.a, y.a, y.b as y_b, y.b from t left join s on s.a = t.a -left join y on y.b = s.b format PrettyCompactNoEscapes; +left join y on y.b = s.b +order by t.a +format PrettyCompactNoEscapes; select t.a, t.a as t_a, s.a, s.a as s_a, y.a, y.a as y_a from t left join s on t.a = s.a -left join y on y.b = s.b format PrettyCompactNoEscapes; +left join y on y.b = s.b +order by t.a +format PrettyCompactNoEscapes; drop table t; drop table s; diff --git a/dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.reference b/dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.reference new file mode 100644 index 00000000000..a3723bc9976 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.reference @@ -0,0 +1,8 @@ +1 1 1 1 +0 0 0 0 +0 +1 +1 1 1 1 1 1 +2 2 0 0 0 0 +2 2 0 +1 1 1 diff --git a/dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.sql b/dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.sql new file mode 100644 index 00000000000..bd3a2a19913 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00882_multiple_join_no_alias.sql @@ -0,0 +1,35 @@ +drop table if exists t; +drop table if exists s; +drop table if exists y; + +create table t(a Int64, b Int64) engine = Memory; +create table s(a Int64, b Int64) engine = Memory; +create table y(a Int64, b Int64) engine = Memory; + +insert into t values (1,1), (2,2); +insert into s values (1,1); +insert into y values (1,1); + +select s.a, s.a, s.b as s_b, s.b from t +left join s on s.a = t.a +left join y on s.b = y.b +order by t.a; + +select max(s.a) from t +left join s on s.a = t.a +left join y on s.b = y.b +group by t.a; + +select t.a, t.a as t_a, s.a, s.a as s_a, y.a, y.a as y_a from t +left join s on t.a = s.a +left join y on y.b = s.b +order by t.a; + +select t.a, t.a as t_a, max(s.a) from t +left join s on t.a = s.a +left join y on y.b = s.b +group by t.a; + +drop table t; +drop table s; +drop table y; From 9533ff12ddd5d2fab161381d27ed3c964f4f1c9d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 22 Sep 2019 05:12:55 +0300 Subject: [PATCH 028/215] Merge pull request #7032 from azat-archive/distributed-directory-monitor-SIGSEGV Avoid SIGSEGV on batch send failure (file with index XX is absent) (cherry picked from commit 5970aafd9aa5f1c885365297dfdb3d10fec8fdb8) --- dbms/src/Storages/Distributed/DirectoryMonitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp index 13c9cf3050a..6d5bffe3491 100644 --- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp @@ -390,7 +390,8 @@ struct StorageDistributedDirectoryMonitor::Batch remote->writePrepared(in); } - remote->writeSuffix(); + if (remote) + remote->writeSuffix(); } catch (const Exception & e) { From af5099b869f9656389bd7bc10f8d6b77b692201d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 24 Sep 2019 02:11:15 +0300 Subject: [PATCH 029/215] Merge pull request #7062 from ClickHouse/fix-msan-for-low-cardinality Fix msan for LowCardinality (cherry picked from commit a314a36feb18b19217d7f544bd8ef15c2fcd1c53) --- dbms/src/Core/Block.cpp | 5 +++- dbms/src/Core/Block.h | 2 +- dbms/src/Functions/IFunction.cpp | 50 +++++++++++++++++++------------- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index b045b9ec1ff..c64cf387a3b 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -219,11 +219,14 @@ size_t Block::getPositionByName(const std::string & name) const } -void Block::checkNumberOfRows() const +void Block::checkNumberOfRows(bool allow_null_columns) const { ssize_t rows = -1; for (const auto & elem : data) { + if (!elem.column && allow_null_columns) + continue; + if (!elem.column) throw Exception("Column " + elem.name + " in block is nullptr, in method checkNumberOfRows." , ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 4a93e5ed803..ae8b07718dd 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -90,7 +90,7 @@ public: size_t columns() const { return data.size(); } /// Checks that every column in block is not nullptr and has same number of elements. - void checkNumberOfRows() const; + void checkNumberOfRows(bool allow_null_columns = false) const; /// Approximate number of bytes in memory - for profiling and limits. size_t bytes() const; diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index a86ea724f7a..9a3633a9790 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -337,19 +337,43 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( size_t num_rows = input_rows_count; ColumnPtr indexes; + /// Find first LowCardinality column and replace it to nested dictionary. for (auto arg : args) { ColumnWithTypeAndName & column = block.getByPosition(arg); if (auto * low_cardinality_column = checkAndGetColumn(column.column.get())) { + /// Single LowCardinality column is supported now. if (indexes) throw Exception("Expected single dictionary argument for function.", ErrorCodes::LOGICAL_ERROR); - indexes = low_cardinality_column->getIndexesPtr(); - num_rows = low_cardinality_column->getDictionary().size(); + auto * low_cardinality_type = checkAndGetDataType(column.type.get()); + + if (!low_cardinality_type) + throw Exception("Incompatible type for low cardinality column: " + column.type->getName(), + ErrorCodes::LOGICAL_ERROR); + + if (can_be_executed_on_default_arguments) + { + /// Normal case, when function can be executed on values's default. + column.column = low_cardinality_column->getDictionary().getNestedColumn(); + indexes = low_cardinality_column->getIndexesPtr(); + } + else + { + /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). + /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. + auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); + column.column = dict_encoded.dictionary; + indexes = dict_encoded.indexes; + } + + num_rows = column.column->size(); + column.type = low_cardinality_type->getDictionaryType(); } } + /// Change size of constants. for (auto arg : args) { ColumnWithTypeAndName & column = block.getByPosition(arg); @@ -358,26 +382,12 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( column.column = column_const->removeLowCardinality()->cloneResized(num_rows); column.type = removeLowCardinality(column.type); } - else if (auto * low_cardinality_column = checkAndGetColumn(column.column.get())) - { - auto * low_cardinality_type = checkAndGetDataType(column.type.get()); - - if (!low_cardinality_type) - throw Exception("Incompatible type for low cardinality column: " + column.type->getName(), - ErrorCodes::LOGICAL_ERROR); - - if (can_be_executed_on_default_arguments) - column.column = low_cardinality_column->getDictionary().getNestedColumn(); - else - { - auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); - column.column = dict_encoded.dictionary; - indexes = dict_encoded.indexes; - } - column.type = low_cardinality_type->getDictionaryType(); - } } +#ifndef NDEBUG + block.checkNumberOfRows(true); +#endif + return indexes; } From 8e4dd482496062fd5fa74da4ce8d40e5866f4ad8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 24 Sep 2019 02:11:33 +0300 Subject: [PATCH 030/215] Merge pull request #7069 from ClickHouse/compatibility-settings-19.14 Fixed compatibility for distributed queries between 19.14 and earlier versions (cherry picked from commit 575ddefa6c32c692f2db7ee8688ca586f3b1d622) --- dbms/src/Interpreters/ClusterProxy/executeQuery.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp index 989595b3647..9a0494cca45 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -18,8 +18,6 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); - new_settings.connection_pool_max_wait_ms = Cluster::saturate(new_settings.connection_pool_max_wait_ms, settings.max_execution_time); - new_settings.replace_running_query_max_wait_ms = Cluster::saturate(new_settings.replace_running_query_max_wait_ms, settings.max_execution_time); /// Does not matter on remote servers, because queries are sent under different user. new_settings.max_concurrent_queries_for_user = 0; @@ -39,8 +37,8 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } BlockInputStreams executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, - const ASTPtr & query_ast, const Context & context, const Settings & settings) + IStreamFactory & stream_factory, const ClusterPtr & cluster, + const ASTPtr & query_ast, const Context & context, const Settings & settings) { BlockInputStreams res; From 79b96de31af46895726fc71072fded6263c7f5da Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 24 Sep 2019 05:18:15 +0300 Subject: [PATCH 031/215] Merge pull request #7075 from ClickHouse/revert-index-analysis-slowdown Revert "CHYT-142: extend KeyCondition interface so that it returns BoolMask" (cherry picked from commit a62866918fc333338bc7ac624db1681761fc5029) --- dbms/src/Interpreters/Set.cpp | 2 +- dbms/src/Interpreters/Set.h | 4 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 39 ++++++++++--------- dbms/src/Storages/MergeTree/KeyCondition.h | 14 +++---- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 12 +++--- .../MergeTree/MergeTreeIndexFullText.cpp | 4 +- .../MergeTree/MergeTreeIndexMinMax.cpp | 2 +- 7 files changed, 39 insertions(+), 38 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 4313decd36d..68c219c3a91 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -470,7 +470,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector & key_ranges, const DataTypes & data_types) +BoolMask MergeTreeSetIndex::mayBeTrueInRange(const std::vector & key_ranges, const DataTypes & data_types) { size_t tuple_size = indexes_mapping.size(); diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index a252f1ebc1e..987252e37ba 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -170,7 +170,7 @@ using Sets = std::vector; class IFunction; using FunctionPtr = std::shared_ptr; -/// Class for checkInRange function. +/// Class for mayBeTrueInRange function. class MergeTreeSetIndex { public: @@ -188,7 +188,7 @@ public: size_t size() const { return ordered_set.at(0)->size(); } - BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types); + BoolMask mayBeTrueInRange(const std::vector & key_ranges, const DataTypes & data_types); private: Columns ordered_set; diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 0ebe8f79aba..b3e4c776605 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -886,7 +886,7 @@ String KeyCondition::toString() const */ template -static BoolMask forAnyParallelogram( +static bool forAnyParallelogram( size_t key_size, const Field * key_left, const Field * key_right, @@ -942,15 +942,16 @@ static BoolMask forAnyParallelogram( for (size_t i = prefix_size + 1; i < key_size; ++i) parallelogram[i] = Range(); - BoolMask result(false, false); - result = result | callback(parallelogram); + if (callback(parallelogram)) + return true; /// [x1] x [y1 .. +inf) if (left_bounded) { parallelogram[prefix_size] = Range(key_left[prefix_size]); - result = result | forAnyParallelogram(key_size, key_left, key_right, true, false, parallelogram, prefix_size + 1, callback); + if (forAnyParallelogram(key_size, key_left, key_right, true, false, parallelogram, prefix_size + 1, callback)) + return true; } /// [x2] x (-inf .. y2] @@ -958,14 +959,15 @@ static BoolMask forAnyParallelogram( if (right_bounded) { parallelogram[prefix_size] = Range(key_right[prefix_size]); - result = result | forAnyParallelogram(key_size, key_left, key_right, false, true, parallelogram, prefix_size + 1, callback); + if (forAnyParallelogram(key_size, key_left, key_right, false, true, parallelogram, prefix_size + 1, callback)) + return true; } - return result; + return false; } -BoolMask KeyCondition::checkInRange( +bool KeyCondition::mayBeTrueInRange( size_t used_key_size, const Field * left_key, const Field * right_key, @@ -991,7 +993,7 @@ BoolMask KeyCondition::checkInRange( return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, [&] (const std::vector & key_ranges_parallelogram) { - auto res = checkInParallelogram(key_ranges_parallelogram, data_types); + auto res = mayBeTrueInParallelogram(key_ranges_parallelogram, data_types); /* std::cerr << "Parallelogram: "; for (size_t i = 0, size = key_ranges.size(); i != size; ++i) @@ -1002,11 +1004,11 @@ BoolMask KeyCondition::checkInRange( }); } - std::optional KeyCondition::applyMonotonicFunctionsChainToRange( Range key_range, MonotonicFunctionsChain & functions, - DataTypePtr current_type) + DataTypePtr current_type +) { for (auto & func : functions) { @@ -1039,7 +1041,7 @@ std::optional KeyCondition::applyMonotonicFunctionsChainToRange( return key_range; } -BoolMask KeyCondition::checkInParallelogram(const std::vector & parallelogram, const DataTypes & data_types) const +bool KeyCondition::mayBeTrueInParallelogram(const std::vector & parallelogram, const DataTypes & data_types) const { std::vector rpn_stack; for (size_t i = 0; i < rpn.size(); ++i) @@ -1087,7 +1089,7 @@ BoolMask KeyCondition::checkInParallelogram(const std::vector & parallelo if (!element.set_index) throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); - rpn_stack.emplace_back(element.set_index->checkInRange(parallelogram, data_types)); + rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(parallelogram, data_types)); if (element.function == RPNElement::FUNCTION_NOT_IN_SET) rpn_stack.back() = !rpn_stack.back(); } @@ -1122,23 +1124,22 @@ BoolMask KeyCondition::checkInParallelogram(const std::vector & parallelo } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); - return rpn_stack[0]; + return rpn_stack[0].can_be_true; } -BoolMask KeyCondition::checkInRange( +bool KeyCondition::mayBeTrueInRange( size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const { - return checkInRange(used_key_size, left_key, right_key, data_types, true); + return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true); } - -BoolMask KeyCondition::getMaskAfter( +bool KeyCondition::mayBeTrueAfter( size_t used_key_size, const Field * left_key, const DataTypes & data_types) const { - return checkInRange(used_key_size, left_key, nullptr, data_types, false); + return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false); } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index 2a5c520b243..61989d1b2d9 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -235,17 +235,17 @@ public: const Names & key_column_names, const ExpressionActionsPtr & key_expr); - /// Whether the condition and its negation are (independently) feasible in the key range. + /// Whether the condition is feasible in the key range. /// left_key and right_key must contain all fields in the sort_descr in the appropriate order. /// data_types - the types of the key columns. - BoolMask checkInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; + bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; - /// Whether the condition and its negation are feasible in the direct product of single column ranges specified by `parallelogram`. - BoolMask checkInParallelogram(const std::vector & parallelogram, const DataTypes & data_types) const; + /// Whether the condition is feasible in the direct product of single column ranges specified by `parallelogram`. + bool mayBeTrueInParallelogram(const std::vector & parallelogram, const DataTypes & data_types) const; - /// Are the condition and its negation valid in a semi-infinite (not limited to the right) key range. + /// Is the condition valid in a semi-infinite (not limited to the right) key range. /// left_key must contain all the fields in the sort_descr in the appropriate order. - BoolMask getMaskAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; + bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; /// Checks that the index can not be used. bool alwaysUnknownOrTrue() const; @@ -330,7 +330,7 @@ public: static const AtomMap atom_map; private: - BoolMask checkInRange( + bool mayBeTrueInRange( size_t used_key_size, const Field * left_key, const Field * right_key, diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 40dc0bf6b52..99b4a49d111 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -269,8 +269,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( if (part->isEmpty()) continue; - if (minmax_idx_condition && !minmax_idx_condition->checkInParallelogram( - part->minmax_idx.parallelogram, data.minmax_idx_column_types).can_be_true) + if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInParallelogram( + part->minmax_idx.parallelogram, data.minmax_idx_column_types)) continue; if (max_block_numbers_to_read) @@ -1200,8 +1200,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( for (size_t i = 0; i < used_key_size; ++i) index[i]->get(range.begin, index_left[i]); - may_be_true = key_condition.getMaskAfter( - used_key_size, index_left.data(), data.primary_key_data_types).can_be_true; + may_be_true = key_condition.mayBeTrueAfter( + used_key_size, index_left.data(), data.primary_key_data_types); } else { @@ -1214,8 +1214,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( index[i]->get(range.end, index_right[i]); } - may_be_true = key_condition.checkInRange( - used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types).can_be_true; + may_be_true = key_condition.mayBeTrueInRange( + used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); } if (!may_be_true) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 246ad6784b2..264c91cd890 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -378,11 +378,11 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in BloomFilterCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in BloomFilterCondition::mayBeTrueOnGranule", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); return rpn_stack[0].can_be_true; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 360e69eacc6..37c094db215 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -143,7 +143,7 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr for (const auto & range : granule->parallelogram) if (range.left.isNull() || range.right.isNull()) return true; - return condition.checkInParallelogram(granule->parallelogram, index.data_types).can_be_true; + return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types); } From 269dcad274685a4bda2104036565381801162ffe Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 2 Oct 2019 20:29:01 +0300 Subject: [PATCH 032/215] Auto version update to [19.14.7.15] [54425] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 06ee301be79..05acccfe7c3 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54425) set(VERSION_MAJOR 19) set(VERSION_MINOR 14) set(VERSION_PATCH 7) -set(VERSION_GITHASH 1989b50166760e2aba201c1aa50f7aebce780cb6) -set(VERSION_DESCRIBE v19.14.7.1-stable) -set(VERSION_STRING 19.14.7.1) +set(VERSION_GITHASH 79b96de31af46895726fc71072fded6263c7f5da) +set(VERSION_DESCRIBE v19.14.7.15-stable) +set(VERSION_STRING 19.14.7.15) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From 314144630231b7ad65344d66618646015a47550a Mon Sep 17 00:00:00 2001 From: Clement Rodriguez Date: Tue, 8 Oct 2019 19:27:00 +0200 Subject: [PATCH 033/215] Implemented MySQL connection mutualization for external MySQL dictionaries --- .../Dictionaries/MySQLDictionarySource.cpp | 4 +- .../src/Interpreters/ExternalDictionaries.cpp | 20 +++ dbms/src/Interpreters/ExternalDictionaries.h | 9 +- libs/libmysqlxx/CMakeLists.txt | 2 + libs/libmysqlxx/include/mysqlxx/PoolFactory.h | 51 +++++++ .../include/mysqlxx/PoolWithFailover.h | 4 + libs/libmysqlxx/src/PoolFactory.cpp | 136 ++++++++++++++++++ libs/libmysqlxx/src/PoolWithFailover.cpp | 21 ++- 8 files changed, 237 insertions(+), 10 deletions(-) create mode 100644 libs/libmysqlxx/include/mysqlxx/PoolFactory.h create mode 100644 libs/libmysqlxx/src/PoolFactory.cpp diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.cpp b/dbms/src/Dictionaries/MySQLDictionarySource.cpp index 497448bf64c..38ddd6df921 100644 --- a/dbms/src/Dictionaries/MySQLDictionarySource.cpp +++ b/dbms/src/Dictionaries/MySQLDictionarySource.cpp @@ -6,7 +6,6 @@ #include "DictionaryStructure.h" - namespace DB { namespace ErrorCodes @@ -47,6 +46,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) # include # include # include "readInvalidateQuery.h" +# include namespace DB { @@ -67,7 +67,7 @@ MySQLDictionarySource::MySQLDictionarySource( , update_field{config.getString(config_prefix + ".update_field", "")} , dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)} , sample_block{sample_block_} - , pool{config, config_prefix} + , pool{mysqlxx::PoolFactory::instance().Get(config, config_prefix)} , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index e1cbd377978..1281ebd49af 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -2,6 +2,10 @@ #include #include +#if USE_MYSQL +# include +#endif + namespace DB { @@ -27,4 +31,20 @@ ExternalLoader::LoadablePtr ExternalDictionaries::create( return DictionaryFactory::instance().create(name, config, key_in_config, context); } +void ExternalDictionaries::reload(const String & name, bool load_never_loading) +{ + #if USE_MYSQL + mysqlxx::PoolFactory::instance().reset(); + #endif + ExternalLoader::reload(name, load_never_loading); +} + +void ExternalDictionaries::reload(bool load_never_loading) +{ + #if USE_MYSQL + mysqlxx::PoolFactory::instance().reset(); + #endif + ExternalLoader::reload(load_never_loading); +} + } diff --git a/dbms/src/Interpreters/ExternalDictionaries.h b/dbms/src/Interpreters/ExternalDictionaries.h index c071349cc97..5265c83379b 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.h +++ b/dbms/src/Interpreters/ExternalDictionaries.h @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -33,6 +32,14 @@ public: return std::static_pointer_cast(tryGetLoadable(name)); } + /// Override ExternalLoader::reload to reset mysqlxx::PoolFactory.h + /// since connection parameters might have changed. Inherited method is called afterward + void reload(const String & name, bool load_never_loading = false); + + /// Override ExternalLoader::reload to reset mysqlxx::PoolFactory.h + /// since connection parameters might have changed. Inherited method is called afterward + void reload(bool load_never_loading = false); + protected: LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config) const override; diff --git a/libs/libmysqlxx/CMakeLists.txt b/libs/libmysqlxx/CMakeLists.txt index 263a031d7b0..4315c71fab3 100644 --- a/libs/libmysqlxx/CMakeLists.txt +++ b/libs/libmysqlxx/CMakeLists.txt @@ -8,6 +8,7 @@ add_library (mysqlxx src/Row.cpp src/Value.cpp src/Pool.cpp + src/PoolFactory.cpp src/PoolWithFailover.cpp include/mysqlxx/Connection.h @@ -15,6 +16,7 @@ add_library (mysqlxx include/mysqlxx/mysqlxx.h include/mysqlxx/Null.h include/mysqlxx/Pool.h + include/mysqlxx/PoolFactory.h include/mysqlxx/PoolWithFailover.h include/mysqlxx/Query.h include/mysqlxx/ResultBase.h diff --git a/libs/libmysqlxx/include/mysqlxx/PoolFactory.h b/libs/libmysqlxx/include/mysqlxx/PoolFactory.h new file mode 100644 index 00000000000..3c553b8b6da --- /dev/null +++ b/libs/libmysqlxx/include/mysqlxx/PoolFactory.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include "PoolWithFailover.h" + +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 + +namespace mysqlxx +{ +/* + * PoolFactory.h + * This class is a helper singleton to mutualize connections to MySQL. + */ +class PoolFactory final : private boost::noncopyable +{ +public: + static PoolFactory & instance(); + + PoolFactory(const PoolFactory &) = delete; + + /** Allocates a PoolWithFailover to connect to MySQL. */ + PoolWithFailover Get(const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + /** Allocates a PoolWithFailover to connect to MySQL. */ + PoolWithFailover Get(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + void reset(); + + + ~PoolFactory() = default; + PoolFactory& operator=(const PoolFactory &) = delete; + +private: + PoolFactory(); + + struct Impl; + std::unique_ptr impl; +}; + +} diff --git a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h index 21b27ebd4fe..af59b705a61 100644 --- a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h +++ b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h @@ -77,6 +77,10 @@ namespace mysqlxx size_t max_tries; /// Mutex for set of replicas. std::mutex mutex; + std::string config_name; + + /// Can the Pool be shared + bool shareable; public: using Entry = Pool::Entry; diff --git a/libs/libmysqlxx/src/PoolFactory.cpp b/libs/libmysqlxx/src/PoolFactory.cpp new file mode 100644 index 00000000000..0e6244c333e --- /dev/null +++ b/libs/libmysqlxx/src/PoolFactory.cpp @@ -0,0 +1,136 @@ +#include +#include +#include + +namespace mysqlxx +{ + +struct PoolFactory::Impl +{ + // Cache of already affected pools identified by their config name + std::map> pools; + + // Cache of Pool ID (host + port + user +...) cibling already established shareable pool + std::map pools_by_ids; + + /// Protect pools and pools_by_ids caches + std::mutex mutex; +}; + +PoolWithFailover PoolFactory::Get(const std::string & config_name, unsigned default_connections, + unsigned max_connections, size_t max_tries) +{ + return Get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries); +} + +/// Duplicate of code from StringUtils.h. Copied here for less dependencies. +static bool startsWith(const std::string & s, const char * prefix) +{ + return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); +} + +static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name) +{ + bool shared = config.getBool(config_name + ".share_connection", false); + + // Not shared no need to generate a name the pool won't be stored + if (!shared) + return ""; + + std::string entry_name = ""; + std::string host = config.getString(config_name + ".host", ""); + std::string port = config.getString(config_name + ".port", ""); + std::string user = config.getString(config_name + ".user", ""); + std::string db = config.getString(config_name + ".db", ""); + std::string table = config.getString(config_name + ".table", ""); + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_name, keys); + + if (config.has(config_name + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys replica_keys; + config.keys(config_name, replica_keys); + for (const auto & replica_config_key : replica_keys) + { + /// There could be another elements in the same level in configuration file, like "user", "port"... + if (startsWith(replica_config_key, "replica")) + { + std::string replica_name = config_name + "." + replica_config_key; + std::string tmp_host = config.getString(replica_name + ".host", host); + std::string tmp_port = config.getString(replica_name + ".port", port); + std::string tmp_user = config.getString(replica_name + ".user", user); + entry_name += (entry_name.empty() ? "" : "|") + tmp_user + "@" + tmp_host + ":" + tmp_port + "/" + db; + } + } + } + else + { + entry_name = user + "@" + host + ":" + port + "/" + db; + } + return entry_name; +} + +PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, unsigned default_connections, unsigned max_connections, size_t max_tries) +{ + + std::lock_guard lock(impl->mutex); + Poco::Util::Application & app = Poco::Util::Application::instance(); + app.logger().warning("Config name=" + config_name); + if (auto entry = impl->pools.find(config_name); entry != impl->pools.end()) + { + app.logger().warning("Entry found=" + config_name); + return *(entry->second.get()); + } + else + { + app.logger().warning("Searching confg=" + config_name); + std::string entry_name = getPoolEntryName(config, config_name); + app.logger().warning("Entry name created=" + entry_name); + if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end()) + { + app.logger().warning("found"); + entry = impl->pools.find(id->second); + std::shared_ptr pool = entry->second; + impl->pools.insert_or_assign(config_name, pool); + app.logger().warning("found OK"); + return *pool; + } + + app.logger().warning("make pool"); + auto pool = std::make_shared(config, config_name, default_connections, max_connections, max_tries); + app.logger().warning("make pool OK"); + // Check the pool will be shared + if (!entry_name.empty()) + { + // Store shared pool + app.logger().warning("store"); + impl->pools.insert_or_assign(config_name, pool); + impl->pools_by_ids.insert_or_assign(entry_name, config_name); + app.logger().warning("store OK"); + } + app.logger().warning("a2"); + auto a2 = *(pool.get()); + app.logger().warning("a2 OK"); + return *(pool.get()); + } +} + +void PoolFactory::reset() +{ + std::lock_guard lock(impl->mutex); + impl->pools.clear(); + impl->pools_by_ids.clear(); +} + +PoolFactory::PoolFactory() : impl(std::make_unique()) {} + +PoolFactory & PoolFactory::instance() +{ + static PoolFactory ret; + return ret; +} + +} diff --git a/libs/libmysqlxx/src/PoolWithFailover.cpp b/libs/libmysqlxx/src/PoolWithFailover.cpp index dd89f1596d3..bcdbcb3df72 100644 --- a/libs/libmysqlxx/src/PoolWithFailover.cpp +++ b/libs/libmysqlxx/src/PoolWithFailover.cpp @@ -48,15 +48,22 @@ PoolWithFailover::PoolWithFailover(const std::string & config_name, const unsign {} PoolWithFailover::PoolWithFailover(const PoolWithFailover & other) - : max_tries{other.max_tries} + : max_tries{other.max_tries}, config_name{other.config_name} { - for (const auto & priority_replicas : other.replicas_by_priority) + if (shareable) { - Replicas replicas; - replicas.reserve(priority_replicas.second.size()); - for (const auto & pool : priority_replicas.second) - replicas.emplace_back(std::make_shared(*pool)); - replicas_by_priority.emplace(priority_replicas.first, std::move(replicas)); + replicas_by_priority = other.replicas_by_priority; + } + else + { + for (const auto & priority_replicas : other.replicas_by_priority) + { + Replicas replicas; + replicas.reserve(priority_replicas.second.size()); + for (const auto & pool : priority_replicas.second) + replicas.emplace_back(std::make_shared(*pool)); + replicas_by_priority.emplace(priority_replicas.first, std::move(replicas)); + } } } From 1a9b7b97afe363ef492f0084d649d6691ae20127 Mon Sep 17 00:00:00 2001 From: Clement Rodriguez Date: Wed, 9 Oct 2019 15:47:09 +0200 Subject: [PATCH 034/215] Fixed PoolWithFailOver --- libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h index af59b705a61..d3cf5ae661a 100644 --- a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h +++ b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h @@ -104,7 +104,7 @@ namespace mysqlxx PoolWithFailover(const PoolWithFailover & other); - PoolWithFailover & operator=(const PoolWithFailover &) = delete; +// PoolWithFailover & operator=(const PoolWithFailover &) = delete; /** Allocates a connection to use. */ Entry Get(); From 72427b06836e2ff07a82016f8a8d69ce8f34f6bf Mon Sep 17 00:00:00 2001 From: Clement Rodriguez Date: Thu, 10 Oct 2019 18:10:46 +0200 Subject: [PATCH 035/215] Mutualization of MySQL connection + integration tests --- .../ExternalDictionariesLoader.cpp | 4 +- .../test_dictionaries_mysql/__init__.py | 0 .../configs/config.xml | 30 +++++ .../configs/dictionaries/mysql_dict1.xml | 39 ++++++ .../configs/dictionaries/mysql_dict2.xml | 113 ++++++++++++++++++ .../test_dictionaries_mysql/configs/users.xml | 23 ++++ .../test_dictionaries_mysql/test.py | 89 ++++++++++++++ .../include/mysqlxx/PoolWithFailover.h | 2 - libs/libmysqlxx/src/PoolFactory.cpp | 13 -- 9 files changed, 296 insertions(+), 17 deletions(-) create mode 100644 dbms/tests/integration/test_dictionaries_mysql/__init__.py create mode 100644 dbms/tests/integration/test_dictionaries_mysql/configs/config.xml create mode 100644 dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml create mode 100644 dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml create mode 100644 dbms/tests/integration/test_dictionaries_mysql/configs/users.xml create mode 100644 dbms/tests/integration/test_dictionaries_mysql/test.py diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index eb387b8842e..6bbe7c0d999 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -27,7 +27,7 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( return DictionaryFactory::instance().create(name, config, key_in_config, context); } -void ExternalDictionaries::reload(const String & name, bool load_never_loading) +void ExternalDictionariesLoader::reload(const String & name, bool load_never_loading) { #if USE_MYSQL mysqlxx::PoolFactory::instance().reset(); @@ -35,7 +35,7 @@ void ExternalDictionaries::reload(const String & name, bool load_never_loading) ExternalLoader::reload(name, load_never_loading); } -void ExternalDictionaries::reload(bool load_never_loading) +void ExternalDictionariesLoader::reload(bool load_never_loading) { #if USE_MYSQL mysqlxx::PoolFactory::instance().reset(); diff --git a/dbms/tests/integration/test_dictionaries_mysql/__init__.py b/dbms/tests/integration/test_dictionaries_mysql/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/config.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/config.xml new file mode 100644 index 00000000000..b60daf72dcf --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/config.xml @@ -0,0 +1,30 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + + /etc/clickhouse-server/config.d/*.xml + diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml new file mode 100644 index 00000000000..0a3a613dfdc --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml @@ -0,0 +1,39 @@ + + + + dict1 + + + test + mysql1 + 3306 + root + clickhouse + test1
+ true + true +
+ + + + + + + id + UInt32 + CAST(id AS UNSIGNED) + + + id + Int32 + + + + value + String + (UNDEFINED) + + + 0 +
+
diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml new file mode 100644 index 00000000000..024d13b14b2 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml @@ -0,0 +1,113 @@ + + + + dict2 + + + test + mysql1 + 3306 + root + clickhouse + test2
+ true + true +
+ + + + + + + id + UInt32 + CAST(id AS UNSIGNED) + + + id + Int32 + + + + value + String + (UNDEFINED) + + + 0 +
+ + + dict3 + + + test + mysql1 + 3306 + root + clickhouse + test2
+ true + true +
+ + + + + + + id + UInt32 + CAST(id AS UNSIGNED) + + + id + Int32 + + + + value + String + (UNDEFINED) + + + 0 +
+ + dict4 + + + test + mysql1 + 3306 + root + clickhouse + test2
+ true + true +
+ + + + + + + id + UInt32 + CAST(id AS UNSIGNED) + + + id + Int32 + + + + value + String + (UNDEFINED) + + + 0 +
+ +
diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/users.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/dbms/tests/integration/test_dictionaries_mysql/test.py b/dbms/tests/integration/test_dictionaries_mysql/test.py new file mode 100644 index 00000000000..52f82cc9b39 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_mysql/test.py @@ -0,0 +1,89 @@ +import pytest +import os +import time + +## sudo -H pip install PyMySQL +import pymysql.cursors + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DICTIONARY_FILES = ['configs/dictionaries/mysql_dict1.xml', 'configs/dictionaries/mysql_dict2.xml'] + +cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) +instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES) + +create_table_mysql_template = """ + CREATE TABLE `test`.`{}` ( + `id` int(11) NOT NULL, + `value` varchar(50) NOT NULL, + PRIMARY KEY (`id`) + ) ENGINE=InnoDB; + """ + +create_clickhouse_dictionary_table_template = """ + CREATE TABLE `test`.`{}` (`id` Int32, `value` String) ENGINE = Dictionary({}) + ORDER BY `id` DESC SETTINGS index_granularity = 8192 + """ + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + instance.query("CREATE DATABASE IF NOT EXISTS test") + + # Create a MySQL database + create_mysql_db(get_mysql_conn(), 'test') + instance.query("CREATE DATABASE clickhouse_mysql ENGINE = MySQL('mysql1:3306', 'test', 'root', 'clickhouse')") + + yield cluster + + finally: + cluster.shutdown() + + +def test_load_mysql_dictionaries(started_cluster): + # Load dictionaries + query = instance.query + query("SYSTEM RELOAD DICTIONARIES") + + for n in range(0, 5): + # Create MySQL tables and fills them + prepare_mysql_table('test' + n) + + #Create Dictionary tables based on MySQL tables + query(create_clickhouse_dictionary_table_template.format('test' + n), 'dict' + n) + + # Check dictionaries are loaded and have correct number of elements + for n in range(0, 100): + if (n % 10) == 0: + # Force reload of dictionaries + query("SYSTEM RELOAD DICTIONARIES") + assert query("SELECT count() FROM `test`.{}".format('test' + (n % 5))).rstrip() == '10000' + +def create_mysql_db(mysql_connection, name): + with mysql_connection.cursor() as cursor: + cursor.execute( + "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) + +def prepare_mysql_table(table_name): + mysql_connection = get_mysql_conn() + + # Create table + create_mysql_table(mysql_connection, table_name) + + # Insert rows using CH + query = instance.query + query("INSERT INTO `clickhouse_mysql`.{}(id, value) select number, concat('{} value ', toString(number)) from numbers(10000) ".format(table_name, table_name)) + assert query("SELECT count() FROM `clickhouse_mysql`.{}".format(table_name)).rstrip() == '10000' + mysql_connection.close() + +def get_mysql_conn(): + conn = pymysql.connect(user='root', password='clickhouse', host='mysql1', port=3308) + return conn + +def create_mysql_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_mysql_template.format(table_name)) + diff --git a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h index d3cf5ae661a..fe151240fa5 100644 --- a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h +++ b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h @@ -104,8 +104,6 @@ namespace mysqlxx PoolWithFailover(const PoolWithFailover & other); -// PoolWithFailover & operator=(const PoolWithFailover &) = delete; - /** Allocates a connection to use. */ Entry Get(); }; diff --git a/libs/libmysqlxx/src/PoolFactory.cpp b/libs/libmysqlxx/src/PoolFactory.cpp index 0e6244c333e..e721fe5027d 100644 --- a/libs/libmysqlxx/src/PoolFactory.cpp +++ b/libs/libmysqlxx/src/PoolFactory.cpp @@ -77,43 +77,30 @@ PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & conf { std::lock_guard lock(impl->mutex); - Poco::Util::Application & app = Poco::Util::Application::instance(); - app.logger().warning("Config name=" + config_name); if (auto entry = impl->pools.find(config_name); entry != impl->pools.end()) { - app.logger().warning("Entry found=" + config_name); return *(entry->second.get()); } else { - app.logger().warning("Searching confg=" + config_name); std::string entry_name = getPoolEntryName(config, config_name); - app.logger().warning("Entry name created=" + entry_name); if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end()) { - app.logger().warning("found"); entry = impl->pools.find(id->second); std::shared_ptr pool = entry->second; impl->pools.insert_or_assign(config_name, pool); - app.logger().warning("found OK"); return *pool; } - app.logger().warning("make pool"); auto pool = std::make_shared(config, config_name, default_connections, max_connections, max_tries); - app.logger().warning("make pool OK"); // Check the pool will be shared if (!entry_name.empty()) { // Store shared pool - app.logger().warning("store"); impl->pools.insert_or_assign(config_name, pool); impl->pools_by_ids.insert_or_assign(entry_name, config_name); - app.logger().warning("store OK"); } - app.logger().warning("a2"); auto a2 = *(pool.get()); - app.logger().warning("a2 OK"); return *(pool.get()); } } From 21a252f0ceb4bfc85e0b1aefbedaced9911a0262 Mon Sep 17 00:00:00 2001 From: Clement Rodriguez Date: Fri, 11 Oct 2019 09:25:22 +0200 Subject: [PATCH 036/215] Fixed integration test --- .../configs/dictionaries/mysql_dict2.xml | 4 +-- .../test_dictionaries_mysql/test.py | 26 ++++++++++++------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml index 024d13b14b2..91506481cc9 100644 --- a/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml @@ -46,7 +46,7 @@ 3306 root clickhouse - test2
+ test3
true true @@ -82,7 +82,7 @@ 3306 root clickhouse - test2
+ test4
true true diff --git a/dbms/tests/integration/test_dictionaries_mysql/test.py b/dbms/tests/integration/test_dictionaries_mysql/test.py index 52f82cc9b39..78aeb747dba 100644 --- a/dbms/tests/integration/test_dictionaries_mysql/test.py +++ b/dbms/tests/integration/test_dictionaries_mysql/test.py @@ -23,7 +23,7 @@ create_table_mysql_template = """ """ create_clickhouse_dictionary_table_template = """ - CREATE TABLE `test`.`{}` (`id` Int32, `value` String) ENGINE = Dictionary({}) + CREATE TABLE `test`.`dict_table_{}` (`id` Int32, `value` String) ENGINE = Dictionary({}) ORDER BY `id` DESC SETTINGS index_granularity = 8192 """ @@ -31,10 +31,14 @@ create_clickhouse_dictionary_table_template = """ def started_cluster(): try: cluster.start() - instance.query("CREATE DATABASE IF NOT EXISTS test") - + # Create a MySQL database create_mysql_db(get_mysql_conn(), 'test') + + # Create database in ClickHouse + instance.query("CREATE DATABASE IF NOT EXISTS test") + + # Create database in ClickChouse using MySQL protocol (will be used for data insertion) instance.query("CREATE DATABASE clickhouse_mysql ENGINE = MySQL('mysql1:3306', 'test', 'root', 'clickhouse')") yield cluster @@ -49,18 +53,17 @@ def test_load_mysql_dictionaries(started_cluster): query("SYSTEM RELOAD DICTIONARIES") for n in range(0, 5): - # Create MySQL tables and fills them - prepare_mysql_table('test' + n) + # Create MySQL tables, fill them and create CH dict tables + prepare_tables('test' + n) - #Create Dictionary tables based on MySQL tables - query(create_clickhouse_dictionary_table_template.format('test' + n), 'dict' + n) - # Check dictionaries are loaded and have correct number of elements for n in range(0, 100): + # Force reload of dictionaries (each 10 iteration) if (n % 10) == 0: - # Force reload of dictionaries query("SYSTEM RELOAD DICTIONARIES") - assert query("SELECT count() FROM `test`.{}".format('test' + (n % 5))).rstrip() == '10000' + + # Check number of rows + assert query("SELECT count() FROM `dict_table_`.{}".format('test' + (n % 5))).rstrip() == '10000' def create_mysql_db(mysql_connection, name): with mysql_connection.cursor() as cursor: @@ -78,6 +81,9 @@ def prepare_mysql_table(table_name): query("INSERT INTO `clickhouse_mysql`.{}(id, value) select number, concat('{} value ', toString(number)) from numbers(10000) ".format(table_name, table_name)) assert query("SELECT count() FROM `clickhouse_mysql`.{}".format(table_name)).rstrip() == '10000' mysql_connection.close() + + #Create CH Dictionary tables based on MySQL tables + query(create_clickhouse_dictionary_table_template.format(table_name + n), 'dict' + n) def get_mysql_conn(): conn = pymysql.connect(user='root', password='clickhouse', host='mysql1', port=3308) From 3407d99ce365139fbf86c11e31c3d257687279c0 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 30 Jan 2020 23:12:00 +0300 Subject: [PATCH 037/215] Added draft version of random table function. Currently unimplemented data generators --- .../DataTypes/DataTypeAggregateFunction.cpp | 4 ++ .../src/DataTypes/DataTypeAggregateFunction.h | 1 + dbms/src/DataTypes/DataTypeArray.cpp | 7 ++ dbms/src/DataTypes/DataTypeArray.h | 1 + dbms/src/DataTypes/DataTypeDecimalBase.cpp | 7 ++ dbms/src/DataTypes/DataTypeDecimalBase.h | 1 + dbms/src/DataTypes/DataTypeEnum.h | 1 + dbms/src/DataTypes/DataTypeFixedString.cpp | 6 ++ dbms/src/DataTypes/DataTypeFixedString.h | 1 + dbms/src/DataTypes/DataTypeLowCardinality.cpp | 6 ++ dbms/src/DataTypes/DataTypeLowCardinality.h | 1 + dbms/src/DataTypes/DataTypeNothing.cpp | 8 +++ dbms/src/DataTypes/DataTypeNothing.h | 1 + dbms/src/DataTypes/DataTypeNullable.cpp | 5 ++ dbms/src/DataTypes/DataTypeNullable.h | 1 + dbms/src/DataTypes/DataTypeNumberBase.cpp | 7 ++ dbms/src/DataTypes/DataTypeNumberBase.h | 1 + dbms/src/DataTypes/DataTypeSet.h | 1 + dbms/src/DataTypes/DataTypeString.cpp | 5 ++ dbms/src/DataTypes/DataTypeString.h | 1 + dbms/src/DataTypes/DataTypeTuple.cpp | 8 +++ dbms/src/DataTypes/DataTypeTuple.h | 1 + dbms/src/DataTypes/IDataType.h | 4 ++ dbms/src/DataTypes/IDataTypeDummy.h | 5 ++ .../TableFunctions/TableFunctionRandom.cpp | 69 +++++++++++++++++++ dbms/src/TableFunctions/TableFunctionRandom.h | 20 ++++++ .../TableFunctions/registerTableFunctions.cpp | 1 + .../TableFunctions/registerTableFunctions.h | 1 + .../01072_random_table_function.sql | 1 + 29 files changed, 176 insertions(+) create mode 100644 dbms/src/TableFunctions/TableFunctionRandom.cpp create mode 100644 dbms/src/TableFunctions/TableFunctionRandom.h create mode 100644 dbms/tests/queries/0_stateless/01072_random_table_function.sql diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 8111b1de2fe..f3b26497912 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -304,6 +304,10 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const return ColumnAggregateFunction::create(function); } +MutableColumnPtr DataTypeAggregateFunction::createColumnWithRandomData(size_t) const +{ + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} /// Create empty state Field DataTypeAggregateFunction::getDefault() const diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h index 9ae7c67a803..e4c226b2917 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h @@ -63,6 +63,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index e2c03805ea8..0500182c61a 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -487,6 +487,13 @@ MutableColumnPtr DataTypeArray::createColumn() const } +MutableColumnPtr DataTypeArray::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + Field DataTypeArray::getDefault() const { return Array(); diff --git a/dbms/src/DataTypes/DataTypeArray.h b/dbms/src/DataTypes/DataTypeArray.h index 1451f27dfbe..ccf269bd357 100644 --- a/dbms/src/DataTypes/DataTypeArray.h +++ b/dbms/src/DataTypes/DataTypeArray.h @@ -94,6 +94,7 @@ public: bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.cpp b/dbms/src/DataTypes/DataTypeDecimalBase.cpp index 7b9a391427c..a0f2bd7bd82 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.cpp +++ b/dbms/src/DataTypes/DataTypeDecimalBase.cpp @@ -41,6 +41,13 @@ MutableColumnPtr DataTypeDecimalBase::createColumn() const return ColumnType::create(0, scale); } +template +MutableColumnPtr DataTypeDecimalBase::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + template void DataTypeDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr) const { diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.h b/dbms/src/DataTypes/DataTypeDecimalBase.h index 11f7490e80a..d579b965412 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.h +++ b/dbms/src/DataTypes/DataTypeDecimalBase.h @@ -83,6 +83,7 @@ public: Field getDefault() const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return true; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index 2cb677984df..a0408df0279 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -111,6 +111,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override { return ColumnType::create(); } + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index d30f1003ca0..a148d0b2d22 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -268,6 +268,12 @@ MutableColumnPtr DataTypeFixedString::createColumn() const return ColumnFixedString::create(n); } +MutableColumnPtr DataTypeFixedString::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + Field DataTypeFixedString::getDefault() const { return String(); diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h index 6d1f1c4db83..4f264d3ac86 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.h +++ b/dbms/src/DataTypes/DataTypeFixedString.h @@ -70,6 +70,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 5db32bd5380..24dc3af48c9 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -934,6 +934,12 @@ MutableColumnPtr DataTypeLowCardinality::createColumn() const return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes)); } +MutableColumnPtr DataTypeLowCardinality::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + Field DataTypeLowCardinality::getDefault() const { return dictionary_type->getDefault(); diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index f8c314909b8..9b22acea7e3 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -68,6 +68,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNothing.cpp b/dbms/src/DataTypes/DataTypeNothing.cpp index 79fbb002bff..ce4990748f9 100644 --- a/dbms/src/DataTypes/DataTypeNothing.cpp +++ b/dbms/src/DataTypes/DataTypeNothing.cpp @@ -14,6 +14,14 @@ MutableColumnPtr DataTypeNothing::createColumn() const return ColumnNothing::create(0); } + +MutableColumnPtr DataTypeNothing::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + void DataTypeNothing::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { size_t size = column.size(); diff --git a/dbms/src/DataTypes/DataTypeNothing.h b/dbms/src/DataTypes/DataTypeNothing.h index e9421fb15e8..5fbe0acc0a9 100644 --- a/dbms/src/DataTypes/DataTypeNothing.h +++ b/dbms/src/DataTypes/DataTypeNothing.h @@ -19,6 +19,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Nothing; } MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; /// These methods read and write zero bytes just to allow to figure out size of column. void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 397d5ba0a65..6f31e66a1e5 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -488,6 +488,11 @@ MutableColumnPtr DataTypeNullable::createColumn() const return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create()); } +MutableColumnPtr DataTypeNullable::createColumnWithRandomData(size_t limit) const +{ + return ColumnNullable::create(nested_data_type->createColumnWithRandomData(limit), DataTypeUInt8().createColumnWithRandomData(limit)); +} + Field DataTypeNullable::getDefault() const { return Null(); diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 1766b399c2a..83a76ae0410 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -76,6 +76,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 90356817730..937967d431a 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -239,6 +239,13 @@ MutableColumnPtr DataTypeNumberBase::createColumn() const return ColumnVector::create(); } +template +MutableColumnPtr DataTypeNumberBase::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + template bool DataTypeNumberBase::isValueRepresentedByInteger() const { diff --git a/dbms/src/DataTypes/DataTypeNumberBase.h b/dbms/src/DataTypes/DataTypeNumberBase.h index fb752ad5329..5a3dda5fe15 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.h +++ b/dbms/src/DataTypes/DataTypeNumberBase.h @@ -45,6 +45,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return false; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h index 7ef0d931279..1d0d56c164b 100644 --- a/dbms/src/DataTypes/DataTypeSet.h +++ b/dbms/src/DataTypes/DataTypeSet.h @@ -21,6 +21,7 @@ public: // Used for expressions analysis. MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); } + MutableColumnPtr createColumnWithRandomData(size_t) const override; // Used only for debugging, making it DUMPABLE Field getDefault() const override { return Tuple(); } diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index ef32fe33690..46478396a68 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -360,6 +360,11 @@ MutableColumnPtr DataTypeString::createColumn() const return ColumnString::create(); } +MutableColumnPtr DataTypeString::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} bool DataTypeString::equals(const IDataType & rhs) const { diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h index 28968eef3f1..4a2c6be42e1 100644 --- a/dbms/src/DataTypes/DataTypeString.h +++ b/dbms/src/DataTypes/DataTypeString.h @@ -54,6 +54,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 4d60177aa4d..5c912b89f2d 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -454,6 +454,14 @@ MutableColumnPtr DataTypeTuple::createColumn() const return ColumnTuple::create(std::move(tuple_columns)); } + +MutableColumnPtr DataTypeTuple::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + Field DataTypeTuple::getDefault() const { return Tuple(ext::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); diff --git a/dbms/src/DataTypes/DataTypeTuple.h b/dbms/src/DataTypes/DataTypeTuple.h index 06f0f62026e..a3a8fb2847e 100644 --- a/dbms/src/DataTypes/DataTypeTuple.h +++ b/dbms/src/DataTypes/DataTypeTuple.h @@ -81,6 +81,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & reader, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 92d0c1057c5..04ad5896154 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -287,6 +287,10 @@ public: */ virtual MutableColumnPtr createColumn() const = 0; + /** Create column for corresponding type and fill with random values. + */ + virtual MutableColumnPtr createColumnWithRandomData(size_t size) const = 0; + /** Create ColumnConst for corresponding type, with specified size and value. */ ColumnPtr createColumnConst(size_t size, const Field & field) const; diff --git a/dbms/src/DataTypes/IDataTypeDummy.h b/dbms/src/DataTypes/IDataTypeDummy.h index f27359e5f74..e346689274f 100644 --- a/dbms/src/DataTypes/IDataTypeDummy.h +++ b/dbms/src/DataTypes/IDataTypeDummy.h @@ -42,6 +42,11 @@ public: throw Exception("Method createColumn() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + MutableColumnPtr createColumnWithRandomData(size_t) const override + { + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + Field getDefault() const override { throw Exception("Method getDefault() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp new file mode 100644 index 00000000000..f7ffe977698 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -0,0 +1,69 @@ +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "registerTableFunctions.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + + ASTs & args = args_func.at(0)->children; + + if (args.size() > 2) + throw Exception("Table function '" + getName() + "' requires one or two arguments: structure (and limit).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + /// Parsing first argument as table structure and creating a sample block + std::string structure = args[0]->as().value.safeGet(); + + UInt64 limit = 1; + /// Parsing second argument if present + if (args.size() == 2) + limit = args[1]->as().value.safeGet(); + + if (!limit) + throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); + + ColumnsDescription columns = parseColumnsListFromString(structure, context); + + Block res_block; + for (const auto & name_type : columns.getOrdinary()) + Column c = name_type.type->createColumnWithRandomData(limit) ; + res_block.insert({ c, name_type.type, name_type.name }); + + auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); + res->startup(); + return res; +} + +void registerTableFunctionRandom(TableFunctionFactory & factory) +{ + factory.registerFunction(TableFunctionFactory::CaseInsensitive); +} + +} diff --git a/dbms/src/TableFunctions/TableFunctionRandom.h b/dbms/src/TableFunctions/TableFunctionRandom.h new file mode 100644 index 00000000000..c4f8e2bca37 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionRandom.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ +/* random(structure, limit) - creates a temporary storage filling columns with random data + * random is case-insensitive table function + */ +class TableFunctionRandom : public ITableFunction +{ +public: + static constexpr auto name = "generate"; + std::string getName() const override { return name; } +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override; +}; + + +} diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 35021cd46d0..91b6b94440c 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -15,6 +15,7 @@ void registerTableFunctions() registerTableFunctionURL(factory); registerTableFunctionValues(factory); registerTableFunctionInput(factory); + registerTableFunctionRandom(factory); #if USE_AWS_S3 registerTableFunctionS3(factory); diff --git a/dbms/src/TableFunctions/registerTableFunctions.h b/dbms/src/TableFunctions/registerTableFunctions.h index 66f2dda90ea..8ae5ab339f4 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.h +++ b/dbms/src/TableFunctions/registerTableFunctions.h @@ -12,6 +12,7 @@ void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctionURL(TableFunctionFactory & factory); void registerTableFunctionValues(TableFunctionFactory & factory); void registerTableFunctionInput(TableFunctionFactory & factory); +void registerTableFunctionRandom(TableFunctionFactory & factory); #if USE_AWS_S3 void registerTableFunctionS3(TableFunctionFactory & factory); diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql new file mode 100644 index 00000000000..fb217befea5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -0,0 +1 @@ +SELECT * FROM random(3) From 3f4db956ca7e305378c22e3df3bbcbeb4a37c988 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 31 Jan 2020 09:36:29 +0300 Subject: [PATCH 038/215] fix --- dbms/src/TableFunctions/TableFunctionRandom.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index f7ffe977698..b68bde17550 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "registerTableFunctions.h" @@ -44,7 +45,7 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C UInt64 limit = 1; /// Parsing second argument if present if (args.size() == 2) - limit = args[1]->as().value.safeGet(); + limit = args[1]->as().value.safeGet(); if (!limit) throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); @@ -53,8 +54,8 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C Block res_block; for (const auto & name_type : columns.getOrdinary()) - Column c = name_type.type->createColumnWithRandomData(limit) ; - res_block.insert({ c, name_type.type, name_type.name }); + MutableColumnPtr column = name_type.type->createColumnWithRandomData(limit); + res_block.insert({ column, name_type.type, name_type.name }); auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); From 5717d233bdf8ff7dae7e546f1791bfee591f3ac3 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 31 Jan 2020 10:58:41 +0300 Subject: [PATCH 039/215] build fix --- dbms/src/DataTypes/DataTypeEnum.cpp | 8 ++++++++ dbms/src/DataTypes/DataTypeSet.h | 5 ++++- dbms/src/TableFunctions/TableFunctionRandom.cpp | 4 +++- .../queries/0_stateless/01072_random_table_function.sql | 2 +- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index 5ca6296f43d..fcaf9f2c8a3 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -347,6 +347,14 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const } +template +MutableColumnPtr DataTypeEnum::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + /// Explicit instantiations. template class DataTypeEnum; template class DataTypeEnum; diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h index 1d0d56c164b..f468881cfe9 100644 --- a/dbms/src/DataTypes/DataTypeSet.h +++ b/dbms/src/DataTypes/DataTypeSet.h @@ -21,7 +21,10 @@ public: // Used for expressions analysis. MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); } - MutableColumnPtr createColumnWithRandomData(size_t) const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override + { + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } // Used only for debugging, making it DUMPABLE Field getDefault() const override { return Tuple(); } diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index b68bde17550..3d4bb1d3247 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -54,8 +54,10 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C Block res_block; for (const auto & name_type : columns.getOrdinary()) + { MutableColumnPtr column = name_type.type->createColumnWithRandomData(limit); - res_block.insert({ column, name_type.type, name_type.name }); + res_block.insert({std::move(column), name_type.type, name_type.name}); + } auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql index fb217befea5..21f0925439d 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.sql +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -1 +1 @@ -SELECT * FROM random(3) +SELECT * FROM generate('id int', 3) From 568893958b6abbfd5d414109025b6aa797c124ad Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Sun, 2 Feb 2020 00:22:00 +0300 Subject: [PATCH 040/215] implemented couple of generators --- .../DataTypes/DataTypeAggregateFunction.cpp | 4 - .../src/DataTypes/DataTypeAggregateFunction.h | 1 - dbms/src/DataTypes/DataTypeArray.cpp | 7 - dbms/src/DataTypes/DataTypeArray.h | 1 - dbms/src/DataTypes/DataTypeDecimalBase.cpp | 7 - dbms/src/DataTypes/DataTypeDecimalBase.h | 1 - dbms/src/DataTypes/DataTypeEnum.cpp | 8 - dbms/src/DataTypes/DataTypeEnum.h | 1 - dbms/src/DataTypes/DataTypeFixedString.cpp | 6 - dbms/src/DataTypes/DataTypeFixedString.h | 1 - dbms/src/DataTypes/DataTypeInterval.h | 2 +- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 6 - dbms/src/DataTypes/DataTypeLowCardinality.h | 1 - dbms/src/DataTypes/DataTypeNothing.cpp | 8 - dbms/src/DataTypes/DataTypeNothing.h | 1 - dbms/src/DataTypes/DataTypeNullable.cpp | 5 - dbms/src/DataTypes/DataTypeNullable.h | 1 - dbms/src/DataTypes/DataTypeNumberBase.cpp | 7 - dbms/src/DataTypes/DataTypeNumberBase.h | 1 - dbms/src/DataTypes/DataTypeSet.h | 4 - dbms/src/DataTypes/DataTypeString.cpp | 5 - dbms/src/DataTypes/DataTypeString.h | 1 - dbms/src/DataTypes/DataTypeTuple.cpp | 8 - dbms/src/DataTypes/DataTypeTuple.h | 1 - dbms/src/DataTypes/IDataType.h | 4 - dbms/src/DataTypes/IDataTypeDummy.h | 5 - .../TableFunctions/TableFunctionRandom.cpp | 228 +++++++++++++++++- .../01072_random_table_function.sql | 46 +++- 28 files changed, 273 insertions(+), 98 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index f3b26497912..8111b1de2fe 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -304,10 +304,6 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const return ColumnAggregateFunction::create(function); } -MutableColumnPtr DataTypeAggregateFunction::createColumnWithRandomData(size_t) const -{ - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} /// Create empty state Field DataTypeAggregateFunction::getDefault() const diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h index e4c226b2917..9ae7c67a803 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h @@ -63,7 +63,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index 0500182c61a..e2c03805ea8 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -487,13 +487,6 @@ MutableColumnPtr DataTypeArray::createColumn() const } -MutableColumnPtr DataTypeArray::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - Field DataTypeArray::getDefault() const { return Array(); diff --git a/dbms/src/DataTypes/DataTypeArray.h b/dbms/src/DataTypes/DataTypeArray.h index ccf269bd357..1451f27dfbe 100644 --- a/dbms/src/DataTypes/DataTypeArray.h +++ b/dbms/src/DataTypes/DataTypeArray.h @@ -94,7 +94,6 @@ public: bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.cpp b/dbms/src/DataTypes/DataTypeDecimalBase.cpp index a0f2bd7bd82..7b9a391427c 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.cpp +++ b/dbms/src/DataTypes/DataTypeDecimalBase.cpp @@ -41,13 +41,6 @@ MutableColumnPtr DataTypeDecimalBase::createColumn() const return ColumnType::create(0, scale); } -template -MutableColumnPtr DataTypeDecimalBase::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - template void DataTypeDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr) const { diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.h b/dbms/src/DataTypes/DataTypeDecimalBase.h index d579b965412..11f7490e80a 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.h +++ b/dbms/src/DataTypes/DataTypeDecimalBase.h @@ -83,7 +83,6 @@ public: Field getDefault() const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return true; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index fcaf9f2c8a3..5ca6296f43d 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -347,14 +347,6 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const } -template -MutableColumnPtr DataTypeEnum::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - /// Explicit instantiations. template class DataTypeEnum; template class DataTypeEnum; diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index a0408df0279..2cb677984df 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -111,7 +111,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override { return ColumnType::create(); } - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index a148d0b2d22..d30f1003ca0 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -268,12 +268,6 @@ MutableColumnPtr DataTypeFixedString::createColumn() const return ColumnFixedString::create(n); } -MutableColumnPtr DataTypeFixedString::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - Field DataTypeFixedString::getDefault() const { return String(); diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h index 4f264d3ac86..6d1f1c4db83 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.h +++ b/dbms/src/DataTypes/DataTypeFixedString.h @@ -70,7 +70,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeInterval.h b/dbms/src/DataTypes/DataTypeInterval.h index 111a2489d65..d66b329185d 100644 --- a/dbms/src/DataTypes/DataTypeInterval.h +++ b/dbms/src/DataTypes/DataTypeInterval.h @@ -12,7 +12,7 @@ namespace DB * Mostly the same as Int64. * But also tagged with interval kind. * - * Intended isage is for temporary elements in expressions, + * Intended usage is for temporary elements in expressions, * not for storing values in tables. */ class DataTypeInterval final : public DataTypeNumberBase diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 24dc3af48c9..5db32bd5380 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -934,12 +934,6 @@ MutableColumnPtr DataTypeLowCardinality::createColumn() const return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes)); } -MutableColumnPtr DataTypeLowCardinality::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - Field DataTypeLowCardinality::getDefault() const { return dictionary_type->getDefault(); diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 9b22acea7e3..f8c314909b8 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -68,7 +68,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNothing.cpp b/dbms/src/DataTypes/DataTypeNothing.cpp index ce4990748f9..79fbb002bff 100644 --- a/dbms/src/DataTypes/DataTypeNothing.cpp +++ b/dbms/src/DataTypes/DataTypeNothing.cpp @@ -14,14 +14,6 @@ MutableColumnPtr DataTypeNothing::createColumn() const return ColumnNothing::create(0); } - -MutableColumnPtr DataTypeNothing::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - void DataTypeNothing::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { size_t size = column.size(); diff --git a/dbms/src/DataTypes/DataTypeNothing.h b/dbms/src/DataTypes/DataTypeNothing.h index 5fbe0acc0a9..e9421fb15e8 100644 --- a/dbms/src/DataTypes/DataTypeNothing.h +++ b/dbms/src/DataTypes/DataTypeNothing.h @@ -19,7 +19,6 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Nothing; } MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; /// These methods read and write zero bytes just to allow to figure out size of column. void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 6f31e66a1e5..397d5ba0a65 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -488,11 +488,6 @@ MutableColumnPtr DataTypeNullable::createColumn() const return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create()); } -MutableColumnPtr DataTypeNullable::createColumnWithRandomData(size_t limit) const -{ - return ColumnNullable::create(nested_data_type->createColumnWithRandomData(limit), DataTypeUInt8().createColumnWithRandomData(limit)); -} - Field DataTypeNullable::getDefault() const { return Null(); diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 83a76ae0410..1766b399c2a 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -76,7 +76,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 937967d431a..90356817730 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -239,13 +239,6 @@ MutableColumnPtr DataTypeNumberBase::createColumn() const return ColumnVector::create(); } -template -MutableColumnPtr DataTypeNumberBase::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - template bool DataTypeNumberBase::isValueRepresentedByInteger() const { diff --git a/dbms/src/DataTypes/DataTypeNumberBase.h b/dbms/src/DataTypes/DataTypeNumberBase.h index 5a3dda5fe15..fb752ad5329 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.h +++ b/dbms/src/DataTypes/DataTypeNumberBase.h @@ -45,7 +45,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return false; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h index f468881cfe9..7ef0d931279 100644 --- a/dbms/src/DataTypes/DataTypeSet.h +++ b/dbms/src/DataTypes/DataTypeSet.h @@ -21,10 +21,6 @@ public: // Used for expressions analysis. MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); } - MutableColumnPtr createColumnWithRandomData(size_t) const override - { - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } // Used only for debugging, making it DUMPABLE Field getDefault() const override { return Tuple(); } diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 46478396a68..ef32fe33690 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -360,11 +360,6 @@ MutableColumnPtr DataTypeString::createColumn() const return ColumnString::create(); } -MutableColumnPtr DataTypeString::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} bool DataTypeString::equals(const IDataType & rhs) const { diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h index 4a2c6be42e1..28968eef3f1 100644 --- a/dbms/src/DataTypes/DataTypeString.h +++ b/dbms/src/DataTypes/DataTypeString.h @@ -54,7 +54,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 5c912b89f2d..4d60177aa4d 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -454,14 +454,6 @@ MutableColumnPtr DataTypeTuple::createColumn() const return ColumnTuple::create(std::move(tuple_columns)); } - -MutableColumnPtr DataTypeTuple::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - Field DataTypeTuple::getDefault() const { return Tuple(ext::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); diff --git a/dbms/src/DataTypes/DataTypeTuple.h b/dbms/src/DataTypes/DataTypeTuple.h index a3a8fb2847e..06f0f62026e 100644 --- a/dbms/src/DataTypes/DataTypeTuple.h +++ b/dbms/src/DataTypes/DataTypeTuple.h @@ -81,7 +81,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & reader, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 04ad5896154..92d0c1057c5 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -287,10 +287,6 @@ public: */ virtual MutableColumnPtr createColumn() const = 0; - /** Create column for corresponding type and fill with random values. - */ - virtual MutableColumnPtr createColumnWithRandomData(size_t size) const = 0; - /** Create ColumnConst for corresponding type, with specified size and value. */ ColumnPtr createColumnConst(size_t size, const Field & field) const; diff --git a/dbms/src/DataTypes/IDataTypeDummy.h b/dbms/src/DataTypes/IDataTypeDummy.h index e346689274f..f27359e5f74 100644 --- a/dbms/src/DataTypes/IDataTypeDummy.h +++ b/dbms/src/DataTypes/IDataTypeDummy.h @@ -42,11 +42,6 @@ public: throw Exception("Method createColumn() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - MutableColumnPtr createColumnWithRandomData(size_t) const override - { - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - Field getDefault() const override { throw Exception("Method getDefault() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index 3d4bb1d3247..1391ecdc74b 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -4,11 +4,17 @@ #include #include #include +#include +#include + #include #include #include +#include +#include + #include #include #include @@ -24,6 +30,226 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; + extern const int BAD_TYPE_OF_FIELD; + extern const int LOGICAL_ERROR; +} + +MutableColumnPtr createColumnWithRandomData(DataTypePtr type, UInt64 limit) +{ + TypeIndex idx = type->getTypeId(); + MutableColumnPtr column = type->createColumn(); + + switch (idx) + { + case TypeIndex::Nothing: + for (UInt64 i = 0; i < limit; ++i) + { + column->insertDefault(); + } + throw Exception("Random Generator not implemented for type 'Nothing'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::UInt8: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt16: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt32: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt64: + { + pcg64 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt128: + throw Exception("Random Generator not implemented for type 'UInt128'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Int8: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int16: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int32: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int64: + { + pcg64 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int128: + throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Float32: + { + pcg32 generator(randomSeed()); + double d; + for (UInt64 i = 0; i < limit; ++i) + { + d = std::numeric_limits::max(); + column->insert( (d / pcg32::max()) * generator() ); + } + } + break; + case TypeIndex::Float64: + { + pcg64 generator(randomSeed()); + double d; + for (UInt64 i = 0; i < limit; ++i) + { + d = std::numeric_limits::max(); + column->insert( (d / pcg64::max()) * generator() ); + } + } + break; + case TypeIndex::Date: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::DateTime: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::DateTime64: + { + UInt32 scale; + if (auto * ptype = typeid_cast(type.get())) + scale = ptype->getScale(); + else + throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + UInt32 fractional = static_cast(generator()) % intExp10(scale); + UInt32 whole = static_cast(generator()); + DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); + column->insert(DecimalField(dt, scale)); + } + } + break; + case TypeIndex::String: + throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::FixedString: + throw Exception("Random Generator not implemented for type 'FixedString'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Enum8: + throw Exception("Random Generator not implemented for type 'Enum8'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Enum16: + throw Exception("Random Generator not implemented for type 'Enum16'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Decimal32: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Decimal64: + { + pcg64 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Decimal128: + throw Exception("Random Generator not implemented for type 'Decimal128'.", ErrorCodes::NOT_IMPLEMENTED); +/* + { + UInt32 scale = 0; + if (auto * ptype = typeid_cast *>(type.get())) + scale = ptype->getScale(); + else + throw Exception("Static cast to Decimal128 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + + pcg128_once_insecure generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(DecimalField(static_cast(generator()), scale)); + } + } + break; +*/ + case TypeIndex::UUID: + { + pcg128_once_insecure generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Array: + throw Exception("Random Generator not implemented for type 'Array'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Tuple: + throw Exception("Random Generator not implemented for type 'Tuple'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Set: + throw Exception("Random Generator not implemented for type 'Set'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Interval: + throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); + case TypeIndex::Nullable: + throw Exception("Random Generator not implemented for type 'Nullable'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Function: + throw Exception("Random Generator not implemented for type 'Function'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::AggregateFunction: + throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::LowCardinality: + throw Exception("Random Generator not implemented for type 'LowCardinality'.", ErrorCodes::NOT_IMPLEMENTED); + } + return column; } StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const @@ -55,7 +281,7 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C Block res_block; for (const auto & name_type : columns.getOrdinary()) { - MutableColumnPtr column = name_type.type->createColumnWithRandomData(limit); + MutableColumnPtr column = createColumnWithRandomData(name_type.type, limit); res_block.insert({std::move(column), name_type.type, name_type.name}); } diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql index 21f0925439d..c81d630d9b5 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.sql +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -1 +1,45 @@ -SELECT * FROM generate('id int', 3) +SELECT +toTypeName(ui64), toTypeName(i64), +toTypeName(ui32), toTypeName(i32), +toTypeName(ui16), toTypeName(i16), +toTypeName(ui8), toTypeName(i8) +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 1); +SELECT +ui64, i64, +ui32, i32, +ui16, i16, +ui8, i8 +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10); +SELECT +toTypeName(d), toTypeName(dt), toTypeName(dtm) +FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 1); +SELECT +d, dt, dtm +FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10) FORMAT JSONEachRow;; +SELECT +toTypeName(dt64), toTypeName(dts64), toTypeName(dtms64) +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 1); +SELECT +dt64, dts64, dtms64 +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10) FORMAT JSONEachRow; +SELECT +dt64, dts64, dtms64 +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10); +SELECT + toTypeName(f32), toTypeName(f64) +FROM generate('f32 Float32, f64 Float64', 1); +SELECT + f32, f64 +FROM generate('f32 Float32, f64 Float64', 10) FORMAT JSONEachRow; +SELECT + toTypeName(d32), toTypeName(d64) +FROM generate('d32 Decimal32(4), d64 Decimal64(8)', 1); +SELECT + d32, d64 +FROM generate('d32 Decimal32(4), d64 Decimal64(8)', 10) FORMAT JSONEachRow; +SELECT + toTypeName(i) +FROM generate('i Interval', 10); +SELECT + i +FROM generate('i Interval', 10) FORMAT JSONEachRow; From ad9cdd080cfcfe36c727ff77d34c5a46ad1a6e4b Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Feb 2020 13:59:26 +0300 Subject: [PATCH 041/215] Fix everything, add docs. --- dbms/src/DataTypes/DataTypeNumberBase.cpp | 2 +- .../TableFunctions/TableFunctionRandom.cpp | 440 ++++++++++++------ .../01072_random_table_function.reference | 207 ++++++++ .../01072_random_table_function.sql | 133 +++++- .../table_functions/generate.md | 38 ++ .../table_functions/generate.md | 1 + .../table_functions/generate.md | 37 ++ .../table_functions/generate.md | 1 + 8 files changed, 705 insertions(+), 154 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01072_random_table_function.reference create mode 100644 docs/en/query_language/table_functions/generate.md create mode 120000 docs/ja/query_language/table_functions/generate.md create mode 100644 docs/ru/query_language/table_functions/generate.md create mode 120000 docs/zh/query_language/table_functions/generate.md diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 90356817730..ce01269bc4d 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -257,7 +257,7 @@ template class DataTypeNumberBase; template class DataTypeNumberBase; template class DataTypeNumberBase; template class DataTypeNumberBase; -template class DataTypeNumberBase; +template class DataTypeNumberBase; // used only in UUID template class DataTypeNumberBase; template class DataTypeNumberBase; template class DataTypeNumberBase; diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index 1391ecdc74b..d70c8a73c63 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -4,8 +4,17 @@ #include #include #include +#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include #include @@ -34,222 +43,355 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -MutableColumnPtr createColumnWithRandomData(DataTypePtr type, UInt64 limit) +void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, + UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed) { TypeIndex idx = type->getTypeId(); - MutableColumnPtr column = type->createColumn(); + if (!random_seed) + random_seed = randomSeed(); + (void) max_string_length; switch (idx) { case TypeIndex::Nothing: - for (UInt64 i = 0; i < limit; ++i) - { - column->insertDefault(); - } throw Exception("Random Generator not implemented for type 'Nothing'.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::UInt8: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt16: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg64 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt128: - throw Exception("Random Generator not implemented for type 'UInt128'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("There is no DataType 'UInt128' support.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::Int8: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int16: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg64 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int128: - throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("There is no DataType 'Int128' support.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::Float32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + double d = 1.0; + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - double d; - for (UInt64 i = 0; i < limit; ++i) - { - d = std::numeric_limits::max(); - column->insert( (d / pcg32::max()) * generator() ); - } + d = std::numeric_limits::max(); + data[i] = (d / pcg32::max()) * generator(); } break; + } case TypeIndex::Float64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + double d = 1.0; + for (UInt64 i = 0; i < limit; ++i) { - pcg64 generator(randomSeed()); - double d; - for (UInt64 i = 0; i < limit; ++i) - { - d = std::numeric_limits::max(); - column->insert( (d / pcg64::max()) * generator() ); - } + d = std::numeric_limits::max(); + data[i] = (d / pcg64::max()) * generator(); } break; + } case TypeIndex::Date: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::DateTime: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::DateTime64: + { + UInt32 scale; + if (auto * ptype = typeid_cast(type.get())) + scale = ptype->getScale(); + else + throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - UInt32 scale; - if (auto * ptype = typeid_cast(type.get())) - scale = ptype->getScale(); - else - throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - UInt32 fractional = static_cast(generator()) % intExp10(scale); - UInt32 whole = static_cast(generator()); - DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); - column->insert(DecimalField(dt, scale)); - } + UInt32 fractional = static_cast(generator()) % intExp10(scale); + UInt32 whole = static_cast(generator()); + DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); + data[i] = dt; } break; + } case TypeIndex::String: - throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::FixedString: - throw Exception("Random Generator not implemented for type 'FixedString'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Enum8: - throw Exception("Random Generator not implemented for type 'Enum8'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Enum16: - throw Exception("Random Generator not implemented for type 'Enum16'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Decimal32: - { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } - } - break; - case TypeIndex::Decimal64: - { - pcg64 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } - } - break; - case TypeIndex::Decimal128: - throw Exception("Random Generator not implemented for type 'Decimal128'.", ErrorCodes::NOT_IMPLEMENTED); -/* - { - UInt32 scale = 0; - if (auto * ptype = typeid_cast *>(type.get())) - scale = ptype->getScale(); - else - throw Exception("Static cast to Decimal128 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + { + auto & column_string = typeid_cast(column); + auto & offsets = column_string.getOffsets(); + auto & chars = column_string.getChars(); - pcg128_once_insecure generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(DecimalField(static_cast(generator()), scale)); - } - } - break; -*/ - case TypeIndex::UUID: + UInt64 offset = 0; { - pcg128_once_insecure generator(randomSeed()); + pcg32 generator(random_seed); + offsets.resize(limit); for (UInt64 i = 0; i < limit; ++i) { - column->insert(static_cast(generator())); + offset += 1 + static_cast(generator()) % max_string_length; + offsets[i] = offset - 1; + } + chars.resize(offset); + for (UInt64 i = 0; i < offset; ++i) { + chars[i] = 32 + generator() % 95; + } + // add terminating zero char + for (auto & i : offsets) + { + chars[i] = 0; } } break; + } + case TypeIndex::FixedString: + { + auto & column_string = typeid_cast(column); + size_t len = column_string.sizeOfValueIfFixed(); + auto & chars = column_string.getChars(); + + UInt64 num_chars = static_cast(len) * limit; + { + pcg32 generator(random_seed); + chars.resize(num_chars); + for (UInt64 i = 0; i < num_chars; ++i) { + chars[i] = static_cast(generator()); + } + } + break; + } + case TypeIndex::Enum8: + { + auto values = typeid_cast *>(type.get())->getValues(); + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + + UInt8 size = values.size(); + UInt8 off; + for (UInt64 i = 0; i < limit; ++i) + { + off = static_cast(generator()) % size; + data[i] = values[off].second; + } + break; + } + case TypeIndex::Enum16: + { + auto values = typeid_cast *>(type.get())->getValues(); + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + + UInt16 size = values.size(); + UInt8 off; + for (UInt64 i = 0; i < limit; ++i) + { + off = static_cast(generator()) % size; + data[i] = values[off].second; + } + break; + } + case TypeIndex::Decimal32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) + { + data[i] = static_cast(generator()); + } + break; + } + case TypeIndex::Decimal64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) + { + data[i] = static_cast(generator()); + } + break; + } + case TypeIndex::Decimal128: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) + { + Int128 x = static_cast(generator()) << 64 | static_cast(generator()); + data[i] = x; + } + } + break; + case TypeIndex::UUID: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { + auto x = UInt128(generator(), generator()); + data[i] = x; + } + } + break; case TypeIndex::Array: - throw Exception("Random Generator not implemented for type 'Array'.", ErrorCodes::NOT_IMPLEMENTED); + { + auto & column_array = typeid_cast(column); + auto nested_type = typeid_cast(type.get())->getNestedType(); + + auto & offsets = column_array.getOffsets(); + IColumn & data = column_array.getData(); + + UInt64 offset = 0; + { + pcg32 generator(random_seed); + offsets.resize(limit); + for (UInt64 i = 0; i < limit; ++i) { + offset += static_cast(generator()) % max_array_length; + offsets[i] = offset; + } + } + fillColumnWithRandomData(data, nested_type, offset, max_array_length, max_string_length, random_seed); + break; + } case TypeIndex::Tuple: - throw Exception("Random Generator not implemented for type 'Tuple'.", ErrorCodes::NOT_IMPLEMENTED); + { + auto &column_tuple = typeid_cast(column); + auto elements = typeid_cast(type.get())->getElements(); + + for (size_t i = 0; i < column_tuple.tupleSize(); ++i) + { + fillColumnWithRandomData(column_tuple.getColumn(i), elements[i], limit, max_array_length, max_string_length, random_seed); + } + break; + } case TypeIndex::Set: - throw Exception("Random Generator not implemented for type 'Set'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Type 'Set' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::Interval: throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::Nullable: - throw Exception("Random Generator not implemented for type 'Nullable'.", ErrorCodes::NOT_IMPLEMENTED); + { + auto & column_nullable = typeid_cast(column); + auto nested_type = typeid_cast(type.get())->getNestedType(); + + auto & null_map = column_nullable.getNullMapData(); + IColumn & nested_column = column_nullable.getNestedColumn(); + + fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, random_seed); + + pcg32 generator(random_seed); + null_map.resize(limit); + for (UInt64 i = 0; i < limit; ++i) { + null_map[i] = generator() < 1024; + } + break; + } case TypeIndex::Function: - throw Exception("Random Generator not implemented for type 'Function'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Type 'Funclion' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::AggregateFunction: throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::LowCardinality: throw Exception("Random Generator not implemented for type 'LowCardinality'.", ErrorCodes::NOT_IMPLEMENTED); } - return column; } StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const @@ -261,30 +403,48 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C ASTs & args = args_func.at(0)->children; - if (args.size() > 2) - throw Exception("Table function '" + getName() + "' requires one or two arguments: structure (and limit).", + if (args.size() > 5) + throw Exception("Table function '" + getName() + "' requires at most five arguments: "\ + " structure, limit, max_array_length, max_string_length, random_seed.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// Parsing first argument as table structure and creating a sample block std::string structure = args[0]->as().value.safeGet(); UInt64 limit = 1; + UInt64 max_array_length = 10; + UInt64 max_string_length = 10; + UInt64 random_seed = 0; // zero for random + /// Parsing second argument if present - if (args.size() == 2) + if (args.size() >= 2) limit = args[1]->as().value.safeGet(); if (!limit) throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); + if (args.size() >= 3) + max_array_length = args[1]->as().value.safeGet(); + + if (args.size() >= 4) + max_string_length = args[1]->as().value.safeGet(); + + if (args.size() == 5) + random_seed = args[1]->as().value.safeGet(); + ColumnsDescription columns = parseColumnsListFromString(structure, context); Block res_block; for (const auto & name_type : columns.getOrdinary()) { - MutableColumnPtr column = createColumnWithRandomData(name_type.type, limit); + MutableColumnPtr column = name_type.type->createColumn(); res_block.insert({std::move(column), name_type.type, name_type.name}); } + for (auto & ctn : res_block.getColumnsWithTypeAndName()) + { + fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, limit, max_array_length, max_string_length, random_seed); + } auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); return res; diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference new file mode 100644 index 00000000000..93ea1861756 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -0,0 +1,207 @@ +Enum8(\'hello\' = 1, \'world\' = 5) +world +hello +world +hello +hello +hello +world +hello +hello +hello +Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) +['world','hello','world','hello','hello','hello','world','hello','hello'] +['hello','world','world','hello','world','world'] +['hello','world','hello','hello','world','world','world'] +['world','world','world','world','world','world','hello','hello'] +['world','hello'] +['hello','hello'] +['world'] +['hello','hello'] +['hello','hello'] +['hello','world','hello','hello','world','world','world','world'] +Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) +o +w +w +w +h +w +h +h +w +o +UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 +2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 +9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 +4555697903102013946 4555697903102013946 275100647 275100647 46055 -19481 231 -25 +5784362079052877875 5784362079052877875 1033685688 1033685688 51896 -13640 184 -72 +11035971995277520997 -7410772078432030619 180895192 180895192 15832 15832 216 -40 +7901646768096461004 7901646768096461004 135557292 135557292 28844 28844 172 -84 +6733841386518201279 6733841386518201279 716914271 716914271 15967 15967 95 95 +7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 +2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 +3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 +Date DateTime DateTime(\'Europe/Moscow\') +2106-02-07 2009-02-16 23:59:49 2009-02-16 23:59:49 +2086-11-02 2007-02-20 10:43:46 2007-02-20 10:43:46 +2096-02-04 1978-09-20 03:50:47 1978-09-20 03:50:47 +2106-02-07 2002-10-04 02:54:48 2002-10-04 02:54:48 +2013-05-07 1975-09-25 19:39:52 1975-09-25 19:39:52 +2048-12-21 1974-04-19 01:48:12 1974-04-19 01:48:12 +2013-09-19 1992-09-19 18:51:11 1992-09-19 18:51:11 +1991-02-02 2002-01-28 12:47:02 2002-01-28 12:47:02 +2056-04-25 2039-04-06 20:11:02 2039-04-06 20:11:02 +2049-06-05 2053-11-20 07:10:58 2053-11-20 07:10:58 +DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') +2007-02-20 10:43:46.989 2007-02-20 10:43:46.817989 2007-02-20 10:43:46.817989 +2002-10-04 02:54:48.647 2002-10-04 02:54:48.100647 2002-10-04 02:54:48.100647 +1974-04-19 01:48:12.192 1974-04-19 01:48:12.895192 1974-04-19 01:48:12.895192 +2002-01-28 12:47:02.271 2002-01-28 12:47:02.914271 2002-01-28 12:47:02.914271 +2053-11-20 07:10:58.662 2053-11-20 07:10:58.722662 2053-11-20 07:10:58.722662 +1986-04-08 19:07:15.849 1986-04-08 19:07:15.510849 1986-04-08 19:07:15.510849 +2081-03-06 04:00:55.914 2081-03-06 04:00:55.448914 2081-03-06 04:00:55.448914 +1979-01-20 20:39:20.939 1979-01-20 20:39:20.162939 1979-01-20 20:39:20.162939 +2063-07-18 01:46:10.215 2063-07-18 01:46:10.908215 2063-07-18 01:46:10.908215 +1996-11-02 14:35:41.110 1996-11-02 14:35:41.183110 1996-11-02 14:35:41.183110 +Float32 Float64 +9.783235e37 2.1973467205491123e307 +9.285203e37 8.887754501811354e307 +2.1795718e37 4.4396706606805647e307 +8.1897013e37 5.637042481600483e307 +1.4331993e37 1.07549012514996e308 +1.0739954e37 7.700402896226395e307 +5.67998e37 6.562339881458101e307 +8.019563e37 7.539520705557441e307 +1.7317079e38 2.143274805821858e307 +2.0973474e38 2.9425818885529257e307 +Decimal32(4) Decimal64(8) Decimal64(8) +123481.7989 22547726199.26532955 4159321346419233104838.6879832895010840 +117195.7426 91200288583.97505560 8403779329565810688767.7049545291714611 +27510.0647 45556979031.02013946 -13670461591942827725055.0250490776469300 +103368.5688 57843620790.52877875 12421744869005473959544.2499747955622051 +18089.5192 -74107720784.32030619 4056969511333950153663.4915186231430947 +13555.7292 79016467680.96461004 -8819413736166121578589.4583420666183888 +71691.4271 67338413865.18201279 13058329479868658041313.8432372419860363 +101221.1222 77365600500.27905187 -4693380431928321782727.0243506636623202 +-210924.4634 21992875789.47862030 13765369952377767241248.9441272127848016 +-164774.2638 30194839130.99890467 -13890064946313418575619.0315227826809939 +UUID +1f4a8fc0-63ff-735b-7e90-d9ed3e183818 +3f39171b-1263-31fa-5046-2ea9fe2fd033 +9927a60f-01ac-f065-6da8-49def100c0cc +5d736910-493d-c3bf-6b5d-c8601d6440a3 +1e857066-961d-be0e-29e7-5c9efd534f23 +bda66d4f-737b-3622-b60f-aa27fe38ff30 +623d6d82-4422-2885-297f-7b2fec54178b +dcb0e0ca-3a43-5f2e-556e-7945df65729e +678f2360-36ac-d439-8d6d-f92295887e50 +9780b53e-dc0f-4a21-bdb3-9798af1913ad +Tuple(Int32, Int64) +(1234817989,2254772619926532955) +(1171957426,9120028858397505560) +(275100647,4555697903102013946) +(1033685688,5784362079052877875) +(180895192,-7410772078432030619) +(135557292,7901646768096461004) +(716914271,6733841386518201279) +(1012211222,7736560050027905187) +(-2109244634,2199287578947862030) +(-1647742638,3019483913099890467) +Array(Int8) +[-59,-78,-25,-72,-40,-84,95,22,38] +[82,65,35,-110,-57,-69] +[72,119,-78,-58,13,39,-71] +[81,107,-11,-63,-59,69,-80,-122] +[87,-76] +[22,-84] +[-45] +[-40,84] +[-104,-86] +[-36,123,44,60,5,25,-5,-127] +Array(Nullable(Int32)) +[1234817989,1171957426,275100647,1033685688,180895192,135557292,716914271,1012211222,-2109244634] +[-1647742638,319510849,513356835,-1966518382,-786518841,269162939] +[285701960,1943908215,-1343029326,1474183110,846934541,1007818023,-1664171079] +[195050577,371018347,734173429,2001591233,-1812297275,1172704837,-728923984,774864518] +[-462583209,-1520633676] +[-638906858,1986832300] +[378774483] +[-1399152424,-953863084] +[733724312,-23652950] +[371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] +Array(Nullable(UUID)) +['1f4a8fc0-63ff-735b-7e90-d9ed3e183818','3f39171b-1263-31fa-5046-2ea9fe2fd033','9927a60f-01ac-f065-6da8-49def100c0cc','5d736910-493d-c3bf-6b5d-c8601d6440a3','1e857066-961d-be0e-29e7-5c9efd534f23','bda66d4f-737b-3622-b60f-aa27fe38ff30','623d6d82-4422-2885-297f-7b2fec54178b','dcb0e0ca-3a43-5f2e-556e-7945df65729e','678f2360-36ac-d439-8d6d-f92295887e50'] +['9780b53e-dc0f-4a21-bdb3-9798af1913ad','c79810de-3635-d333-5ca1-7a81ab302b25','1c756bca-4438-3f17-a766-c8bcbe3ba400','d9072738-ac93-7ed6-167b-3c3c66d35a18','b1e8dec2-de29-3c9f-aaf2-f78fd92df3ce','9cd25f9f-3c0d-f43d-5a46-0194f0be04dd'] +['10a4718d-ab8c-49c6-c785-66ccf112f7d5','02ac2bf5-5634-a5a8-9a18-05ce8d1fb583','8037a13d-2004-08f2-f831-fa2387f5c29a','a99c4373-1121-2691-ecbb-216adbd748c7','ef0986ff-5031-0353-2f21-1de3ea53af08','778064a7-653b-ef7b-c77b-4d769b12b917','a1607e6f-691a-0ff0-b0b3-e454dae7bef7'] +['71c1b47a-c0eb-42b5-eecd-18dc585284fd','72bbf272-9ec5-09ec-f339-b5dac55c037b','26e5bce5-43f7-59b0-84c6-ef509f4c45eb','305fcbff-c366-2033-a8c5-d648f236e754','3a0d329f-f897-84e9-9e87-9501a713e63d','54bda20c-d5cd-a08a-c078-3c4fd81f4f55','43f549d1-3e5b-d5bf-ed32-b4850648bdc8','7eb6ac4f-06e0-ff48-6330-3c7afa5f2644'] +['17b9a4a5-fef8-a3f9-5af4-3b6e67ca62c9','3f524d8e-320d-00dc-c210-e199206550db'] +['005c592e-5081-9f3d-1fcb-5a9e82f39f97','29cf228d-b325-4a34-3eff-e80494a79260'] +['6c08b54b-8cf8-b96d-f087-8b54f5e72d0e'] +['7122e162-ab8b-a84a-6b71-c0846cf0204d','51c1de1a-24c7-18d6-39ed-e9023205610c'] +['f09d6779-1106-d667-e7c9-9a0cad544afe','62060fec-ee13-7c66-5da4-02c8f4d50dc9'] +['df1d0d54-d639-9c9b-2070-622fc9d82203','f23ef5b9-3797-9b0e-b8ac-67ea31b99c3e','e48afe73-9e22-7439-afed-d53b6ea204f4','d7f1ab47-4928-7623-283e-fb3f16aebeba','ea270407-d32f-a407-add2-3ae2d1113ccb','c43e9fff-2980-a1d1-f1bb-ff94d3cffbc2','a0cd54e6-0a2d-07ec-88ad-4f5d29c15b06','5e93413f-2eb9-5363-17ab-e2215b8b19e0'] +Tuple(Int32, Array(Int64)) +(1234817989,[2254772619926532955,9120028858397505560,4555697903102013946,5784362079052877875,-7410772078432030619,7901646768096461004,6733841386518201279,7736560050027905187,2199287578947862030]) +(1171957426,[3019483913099890467,-4781013766399904222,-5327852745410412752,7078934595552553093,2990244123355912075,-2544286630298820818]) +(275100647,[6155991081669718686,7462222003717329977,-8255668614967296432,-7529819295378879967,-4777308097681484883,-4064480117123591373,6674750820081216293]) +(1033685688,[2050663721809231639,-6384194708780112896,-2808232718275215658,1619954721090656792,-5627002805867168609,-6128563945701772338,-7146544521171569603,6504888450989032669]) +(180895192,[1199208254069819846,-4069733657855461419]) +(135557292,[192577216783361448,-7343112807738526333]) +(716914271,[-9207713629233477390]) +(1012211222,[-562393447932771686,-6225026423445182831]) +(-2109244634,[-1388479317275096889,-1222297392734207149]) +(-1647742638,[3396028458740199176,8610993157653131131,-4072576266223306473,-6818310818869145616,-5713972449102020873,8197031236106666677,-1239306987803343619,8267468115072584172]) +FixedString(4) +Ų +ج_ +&RA# +ǻH +w\r +\'Qk +E +W + +T +String +String +String +String +String +String +String +String +String +String +)/VC)%f9 +\0ih|;B +\0J"Z,kd +\0m"m]$35 +\00 +\0( +\0 +\0g +\0> +\0XjbW:s< +Nullable(String) +)/VC)%f9 +\0ih|;B +\0J"Z,kd +\0m"m]$35 +\00 +\0( +\0 +\0g +\0> +\0XjbW:s< +Array(String) +['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] +['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] +['','\0\\p]|]','\05','\0k$C/pnA'] +['\0ryz{*p',''] +['\07`mjt*G',''] +['\0~g'] +['\0k','\0 '] +['\0F','\0&h diff --git a/docs/ja/query_language/table_functions/generate.md b/docs/ja/query_language/table_functions/generate.md new file mode 120000 index 00000000000..de0b0a41754 --- /dev/null +++ b/docs/ja/query_language/table_functions/generate.md @@ -0,0 +1 @@ +en/query_language/table_functions/generate.md \ No newline at end of file diff --git a/docs/ru/query_language/table_functions/generate.md b/docs/ru/query_language/table_functions/generate.md new file mode 100644 index 00000000000..11d7f7073a9 --- /dev/null +++ b/docs/ru/query_language/table_functions/generate.md @@ -0,0 +1,37 @@ +# generate + +Генерирует случайные данные с заданной схемой. +Позволяет заполнять тестовые таблицы данными. +Поддерживает все типы данных, которые могут храниться в таблице, за исключением LowCardinality, AggregateFunction. + +```sql +generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); +``` + +**Входные параметры** +- `name` — название соответствующего столбца. +- `TypeName` — тип соответствующего столбца. +- `limit` — количество строк для генерации. +- `max_array_length` — максимальная длина массива для всех сгенерированных массивов. По умолчанию `10`. +- `max_string_length` — максимальная длина строки для всех генерируемых строк. По умолчанию `10`. +- `random_seed` — укажите состояние генератора случайных чисел вручную, чтобы получить стабильные результаты. По умолчанию `0` - генератор инициализируется случайным состоянием. + +**Возвращаемое значение** + +Объект таблицы с запрошенной схемой. + +## Пример + + +```sql +SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); +``` +```text +┌─a────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐ +│ [77] │ -124167.6723 │ ('2061-04-17 21:59:44.573','3f72f405-ec3e-13c8-44ca-66ef335f7835') │ +│ [32,110] │ -141397.7312 │ ('1979-02-09 03:43:48.526','982486d1-5a5d-a308-e525-7bd8b80ffa73') │ +│ [68] │ -67417.0770 │ ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') │ +└──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/generate/) diff --git a/docs/zh/query_language/table_functions/generate.md b/docs/zh/query_language/table_functions/generate.md new file mode 120000 index 00000000000..de0b0a41754 --- /dev/null +++ b/docs/zh/query_language/table_functions/generate.md @@ -0,0 +1 @@ +en/query_language/table_functions/generate.md \ No newline at end of file From 683a639ce9280a42c1d15511bc62f2d7542625f9 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Feb 2020 15:25:09 +0300 Subject: [PATCH 042/215] doc fixes --- docs/en/query_language/table_functions/generate.md | 2 +- docs/ru/query_language/table_functions/generate.md | 2 +- docs/toc_en.yml | 1 + docs/toc_fa.yml | 1 + docs/toc_ja.yml | 3 ++- docs/toc_ru.yml | 1 + docs/toc_zh.yml | 1 + 7 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/en/query_language/table_functions/generate.md b/docs/en/query_language/table_functions/generate.md index c04ebff8a1a..ed9e2150b03 100644 --- a/docs/en/query_language/table_functions/generate.md +++ b/docs/en/query_language/table_functions/generate.md @@ -2,7 +2,7 @@ Generates random data with given schema. Allows to populate test tables with data. -Supports all data types that can be stored in table except LowCardinality, AggregateFunction. +Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`. ```sql generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); diff --git a/docs/ru/query_language/table_functions/generate.md b/docs/ru/query_language/table_functions/generate.md index 11d7f7073a9..53544d16e7d 100644 --- a/docs/ru/query_language/table_functions/generate.md +++ b/docs/ru/query_language/table_functions/generate.md @@ -2,7 +2,7 @@ Генерирует случайные данные с заданной схемой. Позволяет заполнять тестовые таблицы данными. -Поддерживает все типы данных, которые могут храниться в таблице, за исключением LowCardinality, AggregateFunction. +Поддерживает все типы данных, которые могут храниться в таблице, за исключением `LowCardinality` и `AggregateFunction`. ```sql generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 8558216b15b..76d115045e7 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -142,6 +142,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index bd1e84d590e..280d5a6f53a 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -168,6 +168,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index f47bc065890..6661300a97e 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -140,7 +140,8 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'Dictionaries': + - 'generate': 'query_language/table_functions/generate.md' +- 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': - 'General Description': 'query_language/dicts/external_dicts.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 5999ac74b56..06f196fd2f5 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -141,6 +141,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - 'Словари': - 'Введение': 'query_language/dicts/index.md' - 'Внешние словари': diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index e85c6b50f27..c7ec13a1943 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -167,6 +167,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - '字典': - '介绍': 'query_language/dicts/index.md' - '外部字典': From f8e66e88a43f3f8b0bab4f807b46105ea0e32913 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Feb 2020 16:29:08 +0300 Subject: [PATCH 043/215] style fixes --- .../src/TableFunctions/TableFunctionRandom.cpp | 18 ++++++++++++------ .../query_language/table_functions/generate.md | 1 + .../query_language/table_functions/generate.md | 2 +- .../query_language/table_functions/generate.md | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) create mode 120000 docs/fa/query_language/table_functions/generate.md diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index d70c8a73c63..fff1ed83539 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -224,12 +224,14 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { pcg32 generator(random_seed); offsets.resize(limit); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { offset += 1 + static_cast(generator()) % max_string_length; offsets[i] = offset - 1; } chars.resize(offset); - for (UInt64 i = 0; i < offset; ++i) { + for (UInt64 i = 0; i < offset; ++i) + { chars[i] = 32 + generator() % 95; } // add terminating zero char @@ -250,7 +252,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { pcg32 generator(random_seed); chars.resize(num_chars); - for (UInt64 i = 0; i < num_chars; ++i) { + for (UInt64 i = 0; i < num_chars; ++i) + { chars[i] = static_cast(generator()); } } @@ -327,7 +330,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, auto & data = typeid_cast &>(column).getData(); data.resize(limit); pcg64 generator(random_seed); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { auto x = UInt128(generator(), generator()); data[i] = x; } @@ -345,7 +349,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { pcg32 generator(random_seed); offsets.resize(limit); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { offset += static_cast(generator()) % max_array_length; offsets[i] = offset; } @@ -380,7 +385,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, pcg32 generator(random_seed); null_map.resize(limit); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { null_map[i] = generator() < 1024; } break; diff --git a/docs/fa/query_language/table_functions/generate.md b/docs/fa/query_language/table_functions/generate.md new file mode 120000 index 00000000000..141c05da1e3 --- /dev/null +++ b/docs/fa/query_language/table_functions/generate.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/generate.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/generate.md b/docs/ja/query_language/table_functions/generate.md index de0b0a41754..141c05da1e3 120000 --- a/docs/ja/query_language/table_functions/generate.md +++ b/docs/ja/query_language/table_functions/generate.md @@ -1 +1 @@ -en/query_language/table_functions/generate.md \ No newline at end of file +../../../en/query_language/table_functions/generate.md \ No newline at end of file diff --git a/docs/zh/query_language/table_functions/generate.md b/docs/zh/query_language/table_functions/generate.md index de0b0a41754..141c05da1e3 120000 --- a/docs/zh/query_language/table_functions/generate.md +++ b/docs/zh/query_language/table_functions/generate.md @@ -1 +1 @@ -en/query_language/table_functions/generate.md \ No newline at end of file +../../../en/query_language/table_functions/generate.md \ No newline at end of file From 513c2e8f1b010c3aafcb08bd5c67c77ebd76845c Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Feb 2020 15:34:11 +0300 Subject: [PATCH 044/215] recanonize linux build --- .../01072_random_table_function.reference | 73 ++++++++++++++----- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference index 93ea1861756..3906b417524 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.reference +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -9,6 +9,7 @@ world hello hello hello +- Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) ['world','hello','world','hello','hello','hello','world','hello','hello'] ['hello','world','world','hello','world','world'] @@ -20,6 +21,7 @@ Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) ['hello','hello'] ['hello','hello'] ['hello','world','hello','hello','world','world','world','world'] +- Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) o w @@ -31,6 +33,7 @@ h h w o +- UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 @@ -42,6 +45,7 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 +- Date DateTime DateTime(\'Europe/Moscow\') 2106-02-07 2009-02-16 23:59:49 2009-02-16 23:59:49 2086-11-02 2007-02-20 10:43:46 2007-02-20 10:43:46 @@ -53,6 +57,7 @@ Date DateTime DateTime(\'Europe/Moscow\') 1991-02-02 2002-01-28 12:47:02 2002-01-28 12:47:02 2056-04-25 2039-04-06 20:11:02 2039-04-06 20:11:02 2049-06-05 2053-11-20 07:10:58 2053-11-20 07:10:58 +- DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') 2007-02-20 10:43:46.989 2007-02-20 10:43:46.817989 2007-02-20 10:43:46.817989 2002-10-04 02:54:48.647 2002-10-04 02:54:48.100647 2002-10-04 02:54:48.100647 @@ -64,6 +69,7 @@ DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') 1979-01-20 20:39:20.939 1979-01-20 20:39:20.162939 1979-01-20 20:39:20.162939 2063-07-18 01:46:10.215 2063-07-18 01:46:10.908215 2063-07-18 01:46:10.908215 1996-11-02 14:35:41.110 1996-11-02 14:35:41.183110 1996-11-02 14:35:41.183110 +- Float32 Float64 9.783235e37 2.1973467205491123e307 9.285203e37 8.887754501811354e307 @@ -75,6 +81,7 @@ Float32 Float64 8.019563e37 7.539520705557441e307 1.7317079e38 2.143274805821858e307 2.0973474e38 2.9425818885529257e307 +- Decimal32(4) Decimal64(8) Decimal64(8) 123481.7989 22547726199.26532955 4159321346419233104838.6879832895010840 117195.7426 91200288583.97505560 8403779329565810688767.7049545291714611 @@ -86,17 +93,19 @@ Decimal32(4) Decimal64(8) Decimal64(8) 101221.1222 77365600500.27905187 -4693380431928321782727.0243506636623202 -210924.4634 21992875789.47862030 13765369952377767241248.9441272127848016 -164774.2638 30194839130.99890467 -13890064946313418575619.0315227826809939 +- UUID -1f4a8fc0-63ff-735b-7e90-d9ed3e183818 -3f39171b-1263-31fa-5046-2ea9fe2fd033 -9927a60f-01ac-f065-6da8-49def100c0cc -5d736910-493d-c3bf-6b5d-c8601d6440a3 -1e857066-961d-be0e-29e7-5c9efd534f23 -bda66d4f-737b-3622-b60f-aa27fe38ff30 -623d6d82-4422-2885-297f-7b2fec54178b -dcb0e0ca-3a43-5f2e-556e-7945df65729e -678f2360-36ac-d439-8d6d-f92295887e50 -9780b53e-dc0f-4a21-bdb3-9798af1913ad +7e90d9ed-3e18-3818-1f4a-8fc063ff735b +50462ea9-fe2f-d033-3f39-171b126331fa +6da849de-f100-c0cc-9927-a60f01acf065 +6b5dc860-1d64-40a3-5d73-6910493dc3bf +29e75c9e-fd53-4f23-1e85-7066961dbe0e +b60faa27-fe38-ff30-bda6-6d4f737b3622 +297f7b2f-ec54-178b-623d-6d8244222885 +556e7945-df65-729e-dcb0-e0ca3a435f2e +8d6df922-9588-7e50-678f-236036acd439 +bdb39798-af19-13ad-9780-b53edc0f4a21 +- Tuple(Int32, Int64) (1234817989,2254772619926532955) (1171957426,9120028858397505560) @@ -108,6 +117,7 @@ Tuple(Int32, Int64) (1012211222,7736560050027905187) (-2109244634,2199287578947862030) (-1647742638,3019483913099890467) +- Array(Int8) [-59,-78,-25,-72,-40,-84,95,22,38] [82,65,35,-110,-57,-69] @@ -119,6 +129,7 @@ Array(Int8) [-40,84] [-104,-86] [-36,123,44,60,5,25,-5,-127] +- Array(Nullable(Int32)) [1234817989,1171957426,275100647,1033685688,180895192,135557292,716914271,1012211222,-2109244634] [-1647742638,319510849,513356835,-1966518382,-786518841,269162939] @@ -130,17 +141,19 @@ Array(Nullable(Int32)) [-1399152424,-953863084] [733724312,-23652950] [371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] +- Array(Nullable(UUID)) -['1f4a8fc0-63ff-735b-7e90-d9ed3e183818','3f39171b-1263-31fa-5046-2ea9fe2fd033','9927a60f-01ac-f065-6da8-49def100c0cc','5d736910-493d-c3bf-6b5d-c8601d6440a3','1e857066-961d-be0e-29e7-5c9efd534f23','bda66d4f-737b-3622-b60f-aa27fe38ff30','623d6d82-4422-2885-297f-7b2fec54178b','dcb0e0ca-3a43-5f2e-556e-7945df65729e','678f2360-36ac-d439-8d6d-f92295887e50'] -['9780b53e-dc0f-4a21-bdb3-9798af1913ad','c79810de-3635-d333-5ca1-7a81ab302b25','1c756bca-4438-3f17-a766-c8bcbe3ba400','d9072738-ac93-7ed6-167b-3c3c66d35a18','b1e8dec2-de29-3c9f-aaf2-f78fd92df3ce','9cd25f9f-3c0d-f43d-5a46-0194f0be04dd'] -['10a4718d-ab8c-49c6-c785-66ccf112f7d5','02ac2bf5-5634-a5a8-9a18-05ce8d1fb583','8037a13d-2004-08f2-f831-fa2387f5c29a','a99c4373-1121-2691-ecbb-216adbd748c7','ef0986ff-5031-0353-2f21-1de3ea53af08','778064a7-653b-ef7b-c77b-4d769b12b917','a1607e6f-691a-0ff0-b0b3-e454dae7bef7'] -['71c1b47a-c0eb-42b5-eecd-18dc585284fd','72bbf272-9ec5-09ec-f339-b5dac55c037b','26e5bce5-43f7-59b0-84c6-ef509f4c45eb','305fcbff-c366-2033-a8c5-d648f236e754','3a0d329f-f897-84e9-9e87-9501a713e63d','54bda20c-d5cd-a08a-c078-3c4fd81f4f55','43f549d1-3e5b-d5bf-ed32-b4850648bdc8','7eb6ac4f-06e0-ff48-6330-3c7afa5f2644'] -['17b9a4a5-fef8-a3f9-5af4-3b6e67ca62c9','3f524d8e-320d-00dc-c210-e199206550db'] -['005c592e-5081-9f3d-1fcb-5a9e82f39f97','29cf228d-b325-4a34-3eff-e80494a79260'] -['6c08b54b-8cf8-b96d-f087-8b54f5e72d0e'] -['7122e162-ab8b-a84a-6b71-c0846cf0204d','51c1de1a-24c7-18d6-39ed-e9023205610c'] -['f09d6779-1106-d667-e7c9-9a0cad544afe','62060fec-ee13-7c66-5da4-02c8f4d50dc9'] -['df1d0d54-d639-9c9b-2070-622fc9d82203','f23ef5b9-3797-9b0e-b8ac-67ea31b99c3e','e48afe73-9e22-7439-afed-d53b6ea204f4','d7f1ab47-4928-7623-283e-fb3f16aebeba','ea270407-d32f-a407-add2-3ae2d1113ccb','c43e9fff-2980-a1d1-f1bb-ff94d3cffbc2','a0cd54e6-0a2d-07ec-88ad-4f5d29c15b06','5e93413f-2eb9-5363-17ab-e2215b8b19e0'] +['7e90d9ed-3e18-3818-1f4a-8fc063ff735b','50462ea9-fe2f-d033-3f39-171b126331fa','6da849de-f100-c0cc-9927-a60f01acf065','6b5dc860-1d64-40a3-5d73-6910493dc3bf','29e75c9e-fd53-4f23-1e85-7066961dbe0e','b60faa27-fe38-ff30-bda6-6d4f737b3622','297f7b2f-ec54-178b-623d-6d8244222885','556e7945-df65-729e-dcb0-e0ca3a435f2e','8d6df922-9588-7e50-678f-236036acd439'] +['bdb39798-af19-13ad-9780-b53edc0f4a21','5ca17a81-ab30-2b25-c798-10de3635d333','a766c8bc-be3b-a400-1c75-6bca44383f17','167b3c3c-66d3-5a18-d907-2738ac937ed6','aaf2f78f-d92d-f3ce-b1e8-dec2de293c9f','5a460194-f0be-04dd-9cd2-5f9f3c0df43d'] +['c78566cc-f112-f7d5-10a4-718dab8c49c6','9a1805ce-8d1f-b583-02ac-2bf55634a5a8','f831fa23-87f5-c29a-8037-a13d200408f2','ecbb216a-dbd7-48c7-a99c-437311212691','2f211de3-ea53-af08-ef09-86ff50310353','c77b4d76-9b12-b917-7780-64a7653bef7b','b0b3e454-dae7-bef7-a160-7e6f691a0ff0'] +['eecd18dc-5852-84fd-71c1-b47ac0eb42b5','f339b5da-c55c-037b-72bb-f2729ec509ec','84c6ef50-9f4c-45eb-26e5-bce543f759b0','a8c5d648-f236-e754-305f-cbffc3662033','9e879501-a713-e63d-3a0d-329ff89784e9','c0783c4f-d81f-4f55-54bd-a20cd5cda08a','ed32b485-0648-bdc8-43f5-49d13e5bd5bf','63303c7a-fa5f-2644-7eb6-ac4f06e0ff48'] +['5af43b6e-67ca-62c9-17b9-a4a5fef8a3f9','c210e199-2065-50db-3f52-4d8e320d00dc'] +['1fcb5a9e-82f3-9f97-005c-592e50819f3d','3effe804-94a7-9260-29cf-228db3254a34'] +['f0878b54-f5e7-2d0e-6c08-b54b8cf8b96d'] +['6b71c084-6cf0-204d-7122-e162ab8ba84a','39ede902-3205-610c-51c1-de1a24c718d6'] +['e7c99a0c-ad54-4afe-f09d-67791106d667','5da402c8-f4d5-0dc9-6206-0fecee137c66'] +['2070622f-c9d8-2203-df1d-0d54d6399c9b','b8ac67ea-31b9-9c3e-f23e-f5b937979b0e','afedd53b-6ea2-04f4-e48a-fe739e227439','283efb3f-16ae-beba-d7f1-ab4749287623','add23ae2-d111-3ccb-ea27-0407d32fa407','f1bbff94-d3cf-fbc2-c43e-9fff2980a1d1','88ad4f5d-29c1-5b06-a0cd-54e60a2d07ec','17abe221-5b8b-19e0-5e93-413f2eb95363'] +- Tuple(Int32, Array(Int64)) (1234817989,[2254772619926532955,9120028858397505560,4555697903102013946,5784362079052877875,-7410772078432030619,7901646768096461004,6733841386518201279,7736560050027905187,2199287578947862030]) (1171957426,[3019483913099890467,-4781013766399904222,-5327852745410412752,7078934595552553093,2990244123355912075,-2544286630298820818]) @@ -152,6 +165,7 @@ Tuple(Int32, Array(Int64)) (1012211222,[-562393447932771686,-6225026423445182831]) (-2109244634,[-1388479317275096889,-1222297392734207149]) (-1647742638,[3396028458740199176,8610993157653131131,-4072576266223306473,-6818310818869145616,-5713972449102020873,8197031236106666677,-1239306987803343619,8267468115072584172]) +- FixedString(4) Ų ج_ @@ -163,6 +177,7 @@ w W  T +- String String String @@ -183,6 +198,7 @@ String \0g \0> \0XjbW:s< +- Nullable(String) )/VC)%f9 \0ih|;B @@ -194,6 +210,7 @@ Nullable(String) \0g \0> \0XjbW:s< +- Array(String) ['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] ['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] @@ -205,3 +222,19 @@ Array(String) ['\0k','\0 '] ['\0F','\0&h -210924.4634 w 2.143274805821858e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','8d6df922-9588-7e50-678f-236036acd439') w +[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 2.9425818885529257e307 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','bdb39798-af19-13ad-9780-b53edc0f4a21') \r +- From 659717638835c21d16f3670776ac2ae0b07e0961 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 30 Jan 2020 23:12:00 +0300 Subject: [PATCH 045/215] Added draft version of random table function. Currently unimplemented data generators --- .../DataTypes/DataTypeAggregateFunction.cpp | 4 ++ .../src/DataTypes/DataTypeAggregateFunction.h | 1 + dbms/src/DataTypes/DataTypeArray.cpp | 7 ++ dbms/src/DataTypes/DataTypeArray.h | 1 + dbms/src/DataTypes/DataTypeDecimalBase.cpp | 7 ++ dbms/src/DataTypes/DataTypeDecimalBase.h | 1 + dbms/src/DataTypes/DataTypeEnum.h | 1 + dbms/src/DataTypes/DataTypeFixedString.cpp | 6 ++ dbms/src/DataTypes/DataTypeFixedString.h | 1 + dbms/src/DataTypes/DataTypeLowCardinality.cpp | 6 ++ dbms/src/DataTypes/DataTypeLowCardinality.h | 1 + dbms/src/DataTypes/DataTypeNothing.cpp | 8 +++ dbms/src/DataTypes/DataTypeNothing.h | 1 + dbms/src/DataTypes/DataTypeNullable.cpp | 5 ++ dbms/src/DataTypes/DataTypeNullable.h | 1 + dbms/src/DataTypes/DataTypeNumberBase.cpp | 7 ++ dbms/src/DataTypes/DataTypeNumberBase.h | 1 + dbms/src/DataTypes/DataTypeSet.h | 1 + dbms/src/DataTypes/DataTypeString.cpp | 5 ++ dbms/src/DataTypes/DataTypeString.h | 1 + dbms/src/DataTypes/DataTypeTuple.cpp | 8 +++ dbms/src/DataTypes/DataTypeTuple.h | 1 + dbms/src/DataTypes/IDataType.h | 4 ++ dbms/src/DataTypes/IDataTypeDummy.h | 5 ++ .../TableFunctions/TableFunctionRandom.cpp | 69 +++++++++++++++++++ dbms/src/TableFunctions/TableFunctionRandom.h | 20 ++++++ .../TableFunctions/registerTableFunctions.cpp | 1 + .../TableFunctions/registerTableFunctions.h | 1 + .../01072_random_table_function.sql | 1 + 29 files changed, 176 insertions(+) create mode 100644 dbms/src/TableFunctions/TableFunctionRandom.cpp create mode 100644 dbms/src/TableFunctions/TableFunctionRandom.h create mode 100644 dbms/tests/queries/0_stateless/01072_random_table_function.sql diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 8111b1de2fe..f3b26497912 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -304,6 +304,10 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const return ColumnAggregateFunction::create(function); } +MutableColumnPtr DataTypeAggregateFunction::createColumnWithRandomData(size_t) const +{ + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} /// Create empty state Field DataTypeAggregateFunction::getDefault() const diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h index 9ae7c67a803..e4c226b2917 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h @@ -63,6 +63,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index e2c03805ea8..0500182c61a 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -487,6 +487,13 @@ MutableColumnPtr DataTypeArray::createColumn() const } +MutableColumnPtr DataTypeArray::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + Field DataTypeArray::getDefault() const { return Array(); diff --git a/dbms/src/DataTypes/DataTypeArray.h b/dbms/src/DataTypes/DataTypeArray.h index 1451f27dfbe..ccf269bd357 100644 --- a/dbms/src/DataTypes/DataTypeArray.h +++ b/dbms/src/DataTypes/DataTypeArray.h @@ -94,6 +94,7 @@ public: bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.cpp b/dbms/src/DataTypes/DataTypeDecimalBase.cpp index 7b9a391427c..a0f2bd7bd82 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.cpp +++ b/dbms/src/DataTypes/DataTypeDecimalBase.cpp @@ -41,6 +41,13 @@ MutableColumnPtr DataTypeDecimalBase::createColumn() const return ColumnType::create(0, scale); } +template +MutableColumnPtr DataTypeDecimalBase::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + template void DataTypeDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr) const { diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.h b/dbms/src/DataTypes/DataTypeDecimalBase.h index 11f7490e80a..d579b965412 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.h +++ b/dbms/src/DataTypes/DataTypeDecimalBase.h @@ -83,6 +83,7 @@ public: Field getDefault() const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return true; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index 2cb677984df..a0408df0279 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -111,6 +111,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override { return ColumnType::create(); } + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index d30f1003ca0..a148d0b2d22 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -268,6 +268,12 @@ MutableColumnPtr DataTypeFixedString::createColumn() const return ColumnFixedString::create(n); } +MutableColumnPtr DataTypeFixedString::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + Field DataTypeFixedString::getDefault() const { return String(); diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h index 6d1f1c4db83..4f264d3ac86 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.h +++ b/dbms/src/DataTypes/DataTypeFixedString.h @@ -70,6 +70,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 5db32bd5380..24dc3af48c9 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -934,6 +934,12 @@ MutableColumnPtr DataTypeLowCardinality::createColumn() const return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes)); } +MutableColumnPtr DataTypeLowCardinality::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + Field DataTypeLowCardinality::getDefault() const { return dictionary_type->getDefault(); diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index f8c314909b8..9b22acea7e3 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -68,6 +68,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNothing.cpp b/dbms/src/DataTypes/DataTypeNothing.cpp index 79fbb002bff..ce4990748f9 100644 --- a/dbms/src/DataTypes/DataTypeNothing.cpp +++ b/dbms/src/DataTypes/DataTypeNothing.cpp @@ -14,6 +14,14 @@ MutableColumnPtr DataTypeNothing::createColumn() const return ColumnNothing::create(0); } + +MutableColumnPtr DataTypeNothing::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + void DataTypeNothing::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { size_t size = column.size(); diff --git a/dbms/src/DataTypes/DataTypeNothing.h b/dbms/src/DataTypes/DataTypeNothing.h index e9421fb15e8..5fbe0acc0a9 100644 --- a/dbms/src/DataTypes/DataTypeNothing.h +++ b/dbms/src/DataTypes/DataTypeNothing.h @@ -19,6 +19,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Nothing; } MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; /// These methods read and write zero bytes just to allow to figure out size of column. void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 397d5ba0a65..6f31e66a1e5 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -488,6 +488,11 @@ MutableColumnPtr DataTypeNullable::createColumn() const return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create()); } +MutableColumnPtr DataTypeNullable::createColumnWithRandomData(size_t limit) const +{ + return ColumnNullable::create(nested_data_type->createColumnWithRandomData(limit), DataTypeUInt8().createColumnWithRandomData(limit)); +} + Field DataTypeNullable::getDefault() const { return Null(); diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 1766b399c2a..83a76ae0410 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -76,6 +76,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 90356817730..937967d431a 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -239,6 +239,13 @@ MutableColumnPtr DataTypeNumberBase::createColumn() const return ColumnVector::create(); } +template +MutableColumnPtr DataTypeNumberBase::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + template bool DataTypeNumberBase::isValueRepresentedByInteger() const { diff --git a/dbms/src/DataTypes/DataTypeNumberBase.h b/dbms/src/DataTypes/DataTypeNumberBase.h index fb752ad5329..5a3dda5fe15 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.h +++ b/dbms/src/DataTypes/DataTypeNumberBase.h @@ -45,6 +45,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return false; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h index 7ef0d931279..1d0d56c164b 100644 --- a/dbms/src/DataTypes/DataTypeSet.h +++ b/dbms/src/DataTypes/DataTypeSet.h @@ -21,6 +21,7 @@ public: // Used for expressions analysis. MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); } + MutableColumnPtr createColumnWithRandomData(size_t) const override; // Used only for debugging, making it DUMPABLE Field getDefault() const override { return Tuple(); } diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index ef32fe33690..46478396a68 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -360,6 +360,11 @@ MutableColumnPtr DataTypeString::createColumn() const return ColumnString::create(); } +MutableColumnPtr DataTypeString::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} bool DataTypeString::equals(const IDataType & rhs) const { diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h index 28968eef3f1..4a2c6be42e1 100644 --- a/dbms/src/DataTypes/DataTypeString.h +++ b/dbms/src/DataTypes/DataTypeString.h @@ -54,6 +54,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 4d60177aa4d..5c912b89f2d 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -454,6 +454,14 @@ MutableColumnPtr DataTypeTuple::createColumn() const return ColumnTuple::create(std::move(tuple_columns)); } + +MutableColumnPtr DataTypeTuple::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + Field DataTypeTuple::getDefault() const { return Tuple(ext::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); diff --git a/dbms/src/DataTypes/DataTypeTuple.h b/dbms/src/DataTypes/DataTypeTuple.h index 06f0f62026e..a3a8fb2847e 100644 --- a/dbms/src/DataTypes/DataTypeTuple.h +++ b/dbms/src/DataTypes/DataTypeTuple.h @@ -81,6 +81,7 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & reader, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 2f9f113b955..6c4ea791981 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -287,6 +287,10 @@ public: */ virtual MutableColumnPtr createColumn() const = 0; + /** Create column for corresponding type and fill with random values. + */ + virtual MutableColumnPtr createColumnWithRandomData(size_t size) const = 0; + /** Create ColumnConst for corresponding type, with specified size and value. */ ColumnPtr createColumnConst(size_t size, const Field & field) const; diff --git a/dbms/src/DataTypes/IDataTypeDummy.h b/dbms/src/DataTypes/IDataTypeDummy.h index f27359e5f74..e346689274f 100644 --- a/dbms/src/DataTypes/IDataTypeDummy.h +++ b/dbms/src/DataTypes/IDataTypeDummy.h @@ -42,6 +42,11 @@ public: throw Exception("Method createColumn() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + MutableColumnPtr createColumnWithRandomData(size_t) const override + { + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + Field getDefault() const override { throw Exception("Method getDefault() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp new file mode 100644 index 00000000000..f7ffe977698 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -0,0 +1,69 @@ +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "registerTableFunctions.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + + ASTs & args = args_func.at(0)->children; + + if (args.size() > 2) + throw Exception("Table function '" + getName() + "' requires one or two arguments: structure (and limit).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + /// Parsing first argument as table structure and creating a sample block + std::string structure = args[0]->as().value.safeGet(); + + UInt64 limit = 1; + /// Parsing second argument if present + if (args.size() == 2) + limit = args[1]->as().value.safeGet(); + + if (!limit) + throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); + + ColumnsDescription columns = parseColumnsListFromString(structure, context); + + Block res_block; + for (const auto & name_type : columns.getOrdinary()) + Column c = name_type.type->createColumnWithRandomData(limit) ; + res_block.insert({ c, name_type.type, name_type.name }); + + auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); + res->startup(); + return res; +} + +void registerTableFunctionRandom(TableFunctionFactory & factory) +{ + factory.registerFunction(TableFunctionFactory::CaseInsensitive); +} + +} diff --git a/dbms/src/TableFunctions/TableFunctionRandom.h b/dbms/src/TableFunctions/TableFunctionRandom.h new file mode 100644 index 00000000000..c4f8e2bca37 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionRandom.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ +/* random(structure, limit) - creates a temporary storage filling columns with random data + * random is case-insensitive table function + */ +class TableFunctionRandom : public ITableFunction +{ +public: + static constexpr auto name = "generate"; + std::string getName() const override { return name; } +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override; +}; + + +} diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 35021cd46d0..91b6b94440c 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -15,6 +15,7 @@ void registerTableFunctions() registerTableFunctionURL(factory); registerTableFunctionValues(factory); registerTableFunctionInput(factory); + registerTableFunctionRandom(factory); #if USE_AWS_S3 registerTableFunctionS3(factory); diff --git a/dbms/src/TableFunctions/registerTableFunctions.h b/dbms/src/TableFunctions/registerTableFunctions.h index 66f2dda90ea..8ae5ab339f4 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.h +++ b/dbms/src/TableFunctions/registerTableFunctions.h @@ -12,6 +12,7 @@ void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctionURL(TableFunctionFactory & factory); void registerTableFunctionValues(TableFunctionFactory & factory); void registerTableFunctionInput(TableFunctionFactory & factory); +void registerTableFunctionRandom(TableFunctionFactory & factory); #if USE_AWS_S3 void registerTableFunctionS3(TableFunctionFactory & factory); diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql new file mode 100644 index 00000000000..fb217befea5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -0,0 +1 @@ +SELECT * FROM random(3) From 7a4143ddc3268ade09d9bfa8d976361a163806b8 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 31 Jan 2020 09:36:29 +0300 Subject: [PATCH 046/215] fix --- dbms/src/TableFunctions/TableFunctionRandom.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index f7ffe977698..b68bde17550 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "registerTableFunctions.h" @@ -44,7 +45,7 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C UInt64 limit = 1; /// Parsing second argument if present if (args.size() == 2) - limit = args[1]->as().value.safeGet(); + limit = args[1]->as().value.safeGet(); if (!limit) throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); @@ -53,8 +54,8 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C Block res_block; for (const auto & name_type : columns.getOrdinary()) - Column c = name_type.type->createColumnWithRandomData(limit) ; - res_block.insert({ c, name_type.type, name_type.name }); + MutableColumnPtr column = name_type.type->createColumnWithRandomData(limit); + res_block.insert({ column, name_type.type, name_type.name }); auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); From 11c748d91bc6f210343e4632311ea73c4e58911f Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 31 Jan 2020 10:58:41 +0300 Subject: [PATCH 047/215] build fix --- dbms/src/DataTypes/DataTypeEnum.cpp | 8 ++++++++ dbms/src/DataTypes/DataTypeSet.h | 5 ++++- dbms/src/TableFunctions/TableFunctionRandom.cpp | 4 +++- .../queries/0_stateless/01072_random_table_function.sql | 2 +- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index 5ca6296f43d..fcaf9f2c8a3 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -347,6 +347,14 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const } +template +MutableColumnPtr DataTypeEnum::createColumnWithRandomData(size_t limit) const +{ + (void)limit; + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + /// Explicit instantiations. template class DataTypeEnum; template class DataTypeEnum; diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h index 1d0d56c164b..f468881cfe9 100644 --- a/dbms/src/DataTypes/DataTypeSet.h +++ b/dbms/src/DataTypes/DataTypeSet.h @@ -21,7 +21,10 @@ public: // Used for expressions analysis. MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); } - MutableColumnPtr createColumnWithRandomData(size_t) const override; + MutableColumnPtr createColumnWithRandomData(size_t) const override + { + throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } // Used only for debugging, making it DUMPABLE Field getDefault() const override { return Tuple(); } diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index b68bde17550..3d4bb1d3247 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -54,8 +54,10 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C Block res_block; for (const auto & name_type : columns.getOrdinary()) + { MutableColumnPtr column = name_type.type->createColumnWithRandomData(limit); - res_block.insert({ column, name_type.type, name_type.name }); + res_block.insert({std::move(column), name_type.type, name_type.name}); + } auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql index fb217befea5..21f0925439d 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.sql +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -1 +1 @@ -SELECT * FROM random(3) +SELECT * FROM generate('id int', 3) From 5fae4f88a4bd4958f7d61f74754f113f241a4f34 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Sun, 2 Feb 2020 00:22:00 +0300 Subject: [PATCH 048/215] implemented couple of generators --- .../DataTypes/DataTypeAggregateFunction.cpp | 4 - .../src/DataTypes/DataTypeAggregateFunction.h | 1 - dbms/src/DataTypes/DataTypeArray.cpp | 7 - dbms/src/DataTypes/DataTypeArray.h | 1 - dbms/src/DataTypes/DataTypeDecimalBase.cpp | 7 - dbms/src/DataTypes/DataTypeDecimalBase.h | 1 - dbms/src/DataTypes/DataTypeEnum.cpp | 8 - dbms/src/DataTypes/DataTypeEnum.h | 1 - dbms/src/DataTypes/DataTypeFixedString.cpp | 6 - dbms/src/DataTypes/DataTypeFixedString.h | 1 - dbms/src/DataTypes/DataTypeInterval.h | 2 +- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 6 - dbms/src/DataTypes/DataTypeLowCardinality.h | 1 - dbms/src/DataTypes/DataTypeNothing.cpp | 8 - dbms/src/DataTypes/DataTypeNothing.h | 1 - dbms/src/DataTypes/DataTypeNullable.cpp | 5 - dbms/src/DataTypes/DataTypeNullable.h | 1 - dbms/src/DataTypes/DataTypeNumberBase.cpp | 7 - dbms/src/DataTypes/DataTypeNumberBase.h | 1 - dbms/src/DataTypes/DataTypeSet.h | 4 - dbms/src/DataTypes/DataTypeString.cpp | 5 - dbms/src/DataTypes/DataTypeString.h | 1 - dbms/src/DataTypes/DataTypeTuple.cpp | 8 - dbms/src/DataTypes/DataTypeTuple.h | 1 - dbms/src/DataTypes/IDataType.h | 4 - dbms/src/DataTypes/IDataTypeDummy.h | 5 - .../TableFunctions/TableFunctionRandom.cpp | 228 +++++++++++++++++- .../01072_random_table_function.sql | 46 +++- 28 files changed, 273 insertions(+), 98 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index f3b26497912..8111b1de2fe 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -304,10 +304,6 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const return ColumnAggregateFunction::create(function); } -MutableColumnPtr DataTypeAggregateFunction::createColumnWithRandomData(size_t) const -{ - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} /// Create empty state Field DataTypeAggregateFunction::getDefault() const diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h index e4c226b2917..9ae7c67a803 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h @@ -63,7 +63,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index 0500182c61a..e2c03805ea8 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -487,13 +487,6 @@ MutableColumnPtr DataTypeArray::createColumn() const } -MutableColumnPtr DataTypeArray::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - Field DataTypeArray::getDefault() const { return Array(); diff --git a/dbms/src/DataTypes/DataTypeArray.h b/dbms/src/DataTypes/DataTypeArray.h index ccf269bd357..1451f27dfbe 100644 --- a/dbms/src/DataTypes/DataTypeArray.h +++ b/dbms/src/DataTypes/DataTypeArray.h @@ -94,7 +94,6 @@ public: bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.cpp b/dbms/src/DataTypes/DataTypeDecimalBase.cpp index a0f2bd7bd82..7b9a391427c 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.cpp +++ b/dbms/src/DataTypes/DataTypeDecimalBase.cpp @@ -41,13 +41,6 @@ MutableColumnPtr DataTypeDecimalBase::createColumn() const return ColumnType::create(0, scale); } -template -MutableColumnPtr DataTypeDecimalBase::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - template void DataTypeDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr) const { diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.h b/dbms/src/DataTypes/DataTypeDecimalBase.h index d579b965412..11f7490e80a 100644 --- a/dbms/src/DataTypes/DataTypeDecimalBase.h +++ b/dbms/src/DataTypes/DataTypeDecimalBase.h @@ -83,7 +83,6 @@ public: Field getDefault() const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return true; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index fcaf9f2c8a3..5ca6296f43d 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -347,14 +347,6 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const } -template -MutableColumnPtr DataTypeEnum::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - /// Explicit instantiations. template class DataTypeEnum; template class DataTypeEnum; diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index a0408df0279..2cb677984df 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -111,7 +111,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override { return ColumnType::create(); } - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index a148d0b2d22..d30f1003ca0 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -268,12 +268,6 @@ MutableColumnPtr DataTypeFixedString::createColumn() const return ColumnFixedString::create(n); } -MutableColumnPtr DataTypeFixedString::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - Field DataTypeFixedString::getDefault() const { return String(); diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h index 4f264d3ac86..6d1f1c4db83 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.h +++ b/dbms/src/DataTypes/DataTypeFixedString.h @@ -70,7 +70,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeInterval.h b/dbms/src/DataTypes/DataTypeInterval.h index 111a2489d65..d66b329185d 100644 --- a/dbms/src/DataTypes/DataTypeInterval.h +++ b/dbms/src/DataTypes/DataTypeInterval.h @@ -12,7 +12,7 @@ namespace DB * Mostly the same as Int64. * But also tagged with interval kind. * - * Intended isage is for temporary elements in expressions, + * Intended usage is for temporary elements in expressions, * not for storing values in tables. */ class DataTypeInterval final : public DataTypeNumberBase diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 24dc3af48c9..5db32bd5380 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -934,12 +934,6 @@ MutableColumnPtr DataTypeLowCardinality::createColumn() const return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes)); } -MutableColumnPtr DataTypeLowCardinality::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - Field DataTypeLowCardinality::getDefault() const { return dictionary_type->getDefault(); diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 9b22acea7e3..f8c314909b8 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -68,7 +68,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNothing.cpp b/dbms/src/DataTypes/DataTypeNothing.cpp index ce4990748f9..79fbb002bff 100644 --- a/dbms/src/DataTypes/DataTypeNothing.cpp +++ b/dbms/src/DataTypes/DataTypeNothing.cpp @@ -14,14 +14,6 @@ MutableColumnPtr DataTypeNothing::createColumn() const return ColumnNothing::create(0); } - -MutableColumnPtr DataTypeNothing::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - void DataTypeNothing::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { size_t size = column.size(); diff --git a/dbms/src/DataTypes/DataTypeNothing.h b/dbms/src/DataTypes/DataTypeNothing.h index 5fbe0acc0a9..e9421fb15e8 100644 --- a/dbms/src/DataTypes/DataTypeNothing.h +++ b/dbms/src/DataTypes/DataTypeNothing.h @@ -19,7 +19,6 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Nothing; } MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; /// These methods read and write zero bytes just to allow to figure out size of column. void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 6f31e66a1e5..397d5ba0a65 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -488,11 +488,6 @@ MutableColumnPtr DataTypeNullable::createColumn() const return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create()); } -MutableColumnPtr DataTypeNullable::createColumnWithRandomData(size_t limit) const -{ - return ColumnNullable::create(nested_data_type->createColumnWithRandomData(limit), DataTypeUInt8().createColumnWithRandomData(limit)); -} - Field DataTypeNullable::getDefault() const { return Null(); diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 83a76ae0410..1766b399c2a 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -76,7 +76,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 937967d431a..90356817730 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -239,13 +239,6 @@ MutableColumnPtr DataTypeNumberBase::createColumn() const return ColumnVector::create(); } -template -MutableColumnPtr DataTypeNumberBase::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - template bool DataTypeNumberBase::isValueRepresentedByInteger() const { diff --git a/dbms/src/DataTypes/DataTypeNumberBase.h b/dbms/src/DataTypes/DataTypeNumberBase.h index 5a3dda5fe15..fb752ad5329 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.h +++ b/dbms/src/DataTypes/DataTypeNumberBase.h @@ -45,7 +45,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; bool isParametric() const override { return false; } bool haveSubtypes() const override { return false; } diff --git a/dbms/src/DataTypes/DataTypeSet.h b/dbms/src/DataTypes/DataTypeSet.h index f468881cfe9..7ef0d931279 100644 --- a/dbms/src/DataTypes/DataTypeSet.h +++ b/dbms/src/DataTypes/DataTypeSet.h @@ -21,10 +21,6 @@ public: // Used for expressions analysis. MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); } - MutableColumnPtr createColumnWithRandomData(size_t) const override - { - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } // Used only for debugging, making it DUMPABLE Field getDefault() const override { return Tuple(); } diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 46478396a68..ef32fe33690 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -360,11 +360,6 @@ MutableColumnPtr DataTypeString::createColumn() const return ColumnString::create(); } -MutableColumnPtr DataTypeString::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} bool DataTypeString::equals(const IDataType & rhs) const { diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h index 4a2c6be42e1..28968eef3f1 100644 --- a/dbms/src/DataTypes/DataTypeString.h +++ b/dbms/src/DataTypes/DataTypeString.h @@ -54,7 +54,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 5c912b89f2d..4d60177aa4d 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -454,14 +454,6 @@ MutableColumnPtr DataTypeTuple::createColumn() const return ColumnTuple::create(std::move(tuple_columns)); } - -MutableColumnPtr DataTypeTuple::createColumnWithRandomData(size_t limit) const -{ - (void)limit; - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - Field DataTypeTuple::getDefault() const { return Tuple(ext::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); diff --git a/dbms/src/DataTypes/DataTypeTuple.h b/dbms/src/DataTypes/DataTypeTuple.h index a3a8fb2847e..06f0f62026e 100644 --- a/dbms/src/DataTypes/DataTypeTuple.h +++ b/dbms/src/DataTypes/DataTypeTuple.h @@ -81,7 +81,6 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & reader, bool allow_add_row, bool & row_added) const override; MutableColumnPtr createColumn() const override; - MutableColumnPtr createColumnWithRandomData(size_t) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 6c4ea791981..2f9f113b955 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -287,10 +287,6 @@ public: */ virtual MutableColumnPtr createColumn() const = 0; - /** Create column for corresponding type and fill with random values. - */ - virtual MutableColumnPtr createColumnWithRandomData(size_t size) const = 0; - /** Create ColumnConst for corresponding type, with specified size and value. */ ColumnPtr createColumnConst(size_t size, const Field & field) const; diff --git a/dbms/src/DataTypes/IDataTypeDummy.h b/dbms/src/DataTypes/IDataTypeDummy.h index e346689274f..f27359e5f74 100644 --- a/dbms/src/DataTypes/IDataTypeDummy.h +++ b/dbms/src/DataTypes/IDataTypeDummy.h @@ -42,11 +42,6 @@ public: throw Exception("Method createColumn() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - MutableColumnPtr createColumnWithRandomData(size_t) const override - { - throw Exception("Method createColumnWithRandomData() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - Field getDefault() const override { throw Exception("Method getDefault() is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index 3d4bb1d3247..1391ecdc74b 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -4,11 +4,17 @@ #include #include #include +#include +#include + #include #include #include +#include +#include + #include #include #include @@ -24,6 +30,226 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; + extern const int BAD_TYPE_OF_FIELD; + extern const int LOGICAL_ERROR; +} + +MutableColumnPtr createColumnWithRandomData(DataTypePtr type, UInt64 limit) +{ + TypeIndex idx = type->getTypeId(); + MutableColumnPtr column = type->createColumn(); + + switch (idx) + { + case TypeIndex::Nothing: + for (UInt64 i = 0; i < limit; ++i) + { + column->insertDefault(); + } + throw Exception("Random Generator not implemented for type 'Nothing'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::UInt8: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt16: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt32: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt64: + { + pcg64 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::UInt128: + throw Exception("Random Generator not implemented for type 'UInt128'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Int8: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int16: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int32: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int64: + { + pcg64 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Int128: + throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Float32: + { + pcg32 generator(randomSeed()); + double d; + for (UInt64 i = 0; i < limit; ++i) + { + d = std::numeric_limits::max(); + column->insert( (d / pcg32::max()) * generator() ); + } + } + break; + case TypeIndex::Float64: + { + pcg64 generator(randomSeed()); + double d; + for (UInt64 i = 0; i < limit; ++i) + { + d = std::numeric_limits::max(); + column->insert( (d / pcg64::max()) * generator() ); + } + } + break; + case TypeIndex::Date: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::DateTime: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::DateTime64: + { + UInt32 scale; + if (auto * ptype = typeid_cast(type.get())) + scale = ptype->getScale(); + else + throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + UInt32 fractional = static_cast(generator()) % intExp10(scale); + UInt32 whole = static_cast(generator()); + DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); + column->insert(DecimalField(dt, scale)); + } + } + break; + case TypeIndex::String: + throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::FixedString: + throw Exception("Random Generator not implemented for type 'FixedString'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Enum8: + throw Exception("Random Generator not implemented for type 'Enum8'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Enum16: + throw Exception("Random Generator not implemented for type 'Enum16'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Decimal32: + { + pcg32 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Decimal64: + { + pcg64 generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Decimal128: + throw Exception("Random Generator not implemented for type 'Decimal128'.", ErrorCodes::NOT_IMPLEMENTED); +/* + { + UInt32 scale = 0; + if (auto * ptype = typeid_cast *>(type.get())) + scale = ptype->getScale(); + else + throw Exception("Static cast to Decimal128 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + + pcg128_once_insecure generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) + { + column->insert(DecimalField(static_cast(generator()), scale)); + } + } + break; +*/ + case TypeIndex::UUID: + { + pcg128_once_insecure generator(randomSeed()); + for (UInt64 i = 0; i < limit; ++i) { + column->insert(static_cast(generator())); + } + } + break; + case TypeIndex::Array: + throw Exception("Random Generator not implemented for type 'Array'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Tuple: + throw Exception("Random Generator not implemented for type 'Tuple'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Set: + throw Exception("Random Generator not implemented for type 'Set'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Interval: + throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); + case TypeIndex::Nullable: + throw Exception("Random Generator not implemented for type 'Nullable'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::Function: + throw Exception("Random Generator not implemented for type 'Function'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::AggregateFunction: + throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED); + case TypeIndex::LowCardinality: + throw Exception("Random Generator not implemented for type 'LowCardinality'.", ErrorCodes::NOT_IMPLEMENTED); + } + return column; } StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const @@ -55,7 +281,7 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C Block res_block; for (const auto & name_type : columns.getOrdinary()) { - MutableColumnPtr column = name_type.type->createColumnWithRandomData(limit); + MutableColumnPtr column = createColumnWithRandomData(name_type.type, limit); res_block.insert({std::move(column), name_type.type, name_type.name}); } diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql index 21f0925439d..c81d630d9b5 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.sql +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -1 +1,45 @@ -SELECT * FROM generate('id int', 3) +SELECT +toTypeName(ui64), toTypeName(i64), +toTypeName(ui32), toTypeName(i32), +toTypeName(ui16), toTypeName(i16), +toTypeName(ui8), toTypeName(i8) +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 1); +SELECT +ui64, i64, +ui32, i32, +ui16, i16, +ui8, i8 +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10); +SELECT +toTypeName(d), toTypeName(dt), toTypeName(dtm) +FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 1); +SELECT +d, dt, dtm +FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10) FORMAT JSONEachRow;; +SELECT +toTypeName(dt64), toTypeName(dts64), toTypeName(dtms64) +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 1); +SELECT +dt64, dts64, dtms64 +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10) FORMAT JSONEachRow; +SELECT +dt64, dts64, dtms64 +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10); +SELECT + toTypeName(f32), toTypeName(f64) +FROM generate('f32 Float32, f64 Float64', 1); +SELECT + f32, f64 +FROM generate('f32 Float32, f64 Float64', 10) FORMAT JSONEachRow; +SELECT + toTypeName(d32), toTypeName(d64) +FROM generate('d32 Decimal32(4), d64 Decimal64(8)', 1); +SELECT + d32, d64 +FROM generate('d32 Decimal32(4), d64 Decimal64(8)', 10) FORMAT JSONEachRow; +SELECT + toTypeName(i) +FROM generate('i Interval', 10); +SELECT + i +FROM generate('i Interval', 10) FORMAT JSONEachRow; From 907037bcca01bd56ab7bbcc399c26158af0fde8e Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Feb 2020 13:59:26 +0300 Subject: [PATCH 049/215] Fix everything, add docs. --- dbms/src/DataTypes/DataTypeNumberBase.cpp | 2 +- .../TableFunctions/TableFunctionRandom.cpp | 440 ++++++++++++------ .../01072_random_table_function.reference | 207 ++++++++ .../01072_random_table_function.sql | 133 +++++- .../table_functions/generate.md | 38 ++ .../table_functions/generate.md | 1 + .../table_functions/generate.md | 37 ++ .../table_functions/generate.md | 1 + 8 files changed, 705 insertions(+), 154 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01072_random_table_function.reference create mode 100644 docs/en/query_language/table_functions/generate.md create mode 120000 docs/ja/query_language/table_functions/generate.md create mode 100644 docs/ru/query_language/table_functions/generate.md create mode 120000 docs/zh/query_language/table_functions/generate.md diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 90356817730..ce01269bc4d 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -257,7 +257,7 @@ template class DataTypeNumberBase; template class DataTypeNumberBase; template class DataTypeNumberBase; template class DataTypeNumberBase; -template class DataTypeNumberBase; +template class DataTypeNumberBase; // used only in UUID template class DataTypeNumberBase; template class DataTypeNumberBase; template class DataTypeNumberBase; diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index 1391ecdc74b..d70c8a73c63 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -4,8 +4,17 @@ #include #include #include +#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include #include @@ -34,222 +43,355 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -MutableColumnPtr createColumnWithRandomData(DataTypePtr type, UInt64 limit) +void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, + UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed) { TypeIndex idx = type->getTypeId(); - MutableColumnPtr column = type->createColumn(); + if (!random_seed) + random_seed = randomSeed(); + (void) max_string_length; switch (idx) { case TypeIndex::Nothing: - for (UInt64 i = 0; i < limit; ++i) - { - column->insertDefault(); - } throw Exception("Random Generator not implemented for type 'Nothing'.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::UInt8: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt16: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg64 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::UInt128: - throw Exception("Random Generator not implemented for type 'UInt128'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("There is no DataType 'UInt128' support.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::Int8: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int16: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg64 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::Int128: - throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("There is no DataType 'Int128' support.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::Float32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + double d = 1.0; + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - double d; - for (UInt64 i = 0; i < limit; ++i) - { - d = std::numeric_limits::max(); - column->insert( (d / pcg32::max()) * generator() ); - } + d = std::numeric_limits::max(); + data[i] = (d / pcg32::max()) * generator(); } break; + } case TypeIndex::Float64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + double d = 1.0; + for (UInt64 i = 0; i < limit; ++i) { - pcg64 generator(randomSeed()); - double d; - for (UInt64 i = 0; i < limit; ++i) - { - d = std::numeric_limits::max(); - column->insert( (d / pcg64::max()) * generator() ); - } + d = std::numeric_limits::max(); + data[i] = (d / pcg64::max()) * generator(); } break; + } case TypeIndex::Date: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::DateTime: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } + data[i] = static_cast(generator()); } break; + } case TypeIndex::DateTime64: + { + UInt32 scale; + if (auto * ptype = typeid_cast(type.get())) + scale = ptype->getScale(); + else + throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { - UInt32 scale; - if (auto * ptype = typeid_cast(type.get())) - scale = ptype->getScale(); - else - throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - UInt32 fractional = static_cast(generator()) % intExp10(scale); - UInt32 whole = static_cast(generator()); - DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); - column->insert(DecimalField(dt, scale)); - } + UInt32 fractional = static_cast(generator()) % intExp10(scale); + UInt32 whole = static_cast(generator()); + DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); + data[i] = dt; } break; + } case TypeIndex::String: - throw Exception("Random Generator not implemented for type '" + String(TypeName::get()) + "'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::FixedString: - throw Exception("Random Generator not implemented for type 'FixedString'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Enum8: - throw Exception("Random Generator not implemented for type 'Enum8'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Enum16: - throw Exception("Random Generator not implemented for type 'Enum16'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Decimal32: - { - pcg32 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } - } - break; - case TypeIndex::Decimal64: - { - pcg64 generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(static_cast(generator())); - } - } - break; - case TypeIndex::Decimal128: - throw Exception("Random Generator not implemented for type 'Decimal128'.", ErrorCodes::NOT_IMPLEMENTED); -/* - { - UInt32 scale = 0; - if (auto * ptype = typeid_cast *>(type.get())) - scale = ptype->getScale(); - else - throw Exception("Static cast to Decimal128 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); + { + auto & column_string = typeid_cast(column); + auto & offsets = column_string.getOffsets(); + auto & chars = column_string.getChars(); - pcg128_once_insecure generator(randomSeed()); - for (UInt64 i = 0; i < limit; ++i) - { - column->insert(DecimalField(static_cast(generator()), scale)); - } - } - break; -*/ - case TypeIndex::UUID: + UInt64 offset = 0; { - pcg128_once_insecure generator(randomSeed()); + pcg32 generator(random_seed); + offsets.resize(limit); for (UInt64 i = 0; i < limit; ++i) { - column->insert(static_cast(generator())); + offset += 1 + static_cast(generator()) % max_string_length; + offsets[i] = offset - 1; + } + chars.resize(offset); + for (UInt64 i = 0; i < offset; ++i) { + chars[i] = 32 + generator() % 95; + } + // add terminating zero char + for (auto & i : offsets) + { + chars[i] = 0; } } break; + } + case TypeIndex::FixedString: + { + auto & column_string = typeid_cast(column); + size_t len = column_string.sizeOfValueIfFixed(); + auto & chars = column_string.getChars(); + + UInt64 num_chars = static_cast(len) * limit; + { + pcg32 generator(random_seed); + chars.resize(num_chars); + for (UInt64 i = 0; i < num_chars; ++i) { + chars[i] = static_cast(generator()); + } + } + break; + } + case TypeIndex::Enum8: + { + auto values = typeid_cast *>(type.get())->getValues(); + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + + UInt8 size = values.size(); + UInt8 off; + for (UInt64 i = 0; i < limit; ++i) + { + off = static_cast(generator()) % size; + data[i] = values[off].second; + } + break; + } + case TypeIndex::Enum16: + { + auto values = typeid_cast *>(type.get())->getValues(); + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + + UInt16 size = values.size(); + UInt8 off; + for (UInt64 i = 0; i < limit; ++i) + { + off = static_cast(generator()) % size; + data[i] = values[off].second; + } + break; + } + case TypeIndex::Decimal32: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg32 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) + { + data[i] = static_cast(generator()); + } + break; + } + case TypeIndex::Decimal64: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) + { + data[i] = static_cast(generator()); + } + break; + } + case TypeIndex::Decimal128: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) + { + Int128 x = static_cast(generator()) << 64 | static_cast(generator()); + data[i] = x; + } + } + break; + case TypeIndex::UUID: + { + auto & data = typeid_cast &>(column).getData(); + data.resize(limit); + pcg64 generator(random_seed); + for (UInt64 i = 0; i < limit; ++i) { + auto x = UInt128(generator(), generator()); + data[i] = x; + } + } + break; case TypeIndex::Array: - throw Exception("Random Generator not implemented for type 'Array'.", ErrorCodes::NOT_IMPLEMENTED); + { + auto & column_array = typeid_cast(column); + auto nested_type = typeid_cast(type.get())->getNestedType(); + + auto & offsets = column_array.getOffsets(); + IColumn & data = column_array.getData(); + + UInt64 offset = 0; + { + pcg32 generator(random_seed); + offsets.resize(limit); + for (UInt64 i = 0; i < limit; ++i) { + offset += static_cast(generator()) % max_array_length; + offsets[i] = offset; + } + } + fillColumnWithRandomData(data, nested_type, offset, max_array_length, max_string_length, random_seed); + break; + } case TypeIndex::Tuple: - throw Exception("Random Generator not implemented for type 'Tuple'.", ErrorCodes::NOT_IMPLEMENTED); + { + auto &column_tuple = typeid_cast(column); + auto elements = typeid_cast(type.get())->getElements(); + + for (size_t i = 0; i < column_tuple.tupleSize(); ++i) + { + fillColumnWithRandomData(column_tuple.getColumn(i), elements[i], limit, max_array_length, max_string_length, random_seed); + } + break; + } case TypeIndex::Set: - throw Exception("Random Generator not implemented for type 'Set'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Type 'Set' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::Interval: throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::Nullable: - throw Exception("Random Generator not implemented for type 'Nullable'.", ErrorCodes::NOT_IMPLEMENTED); + { + auto & column_nullable = typeid_cast(column); + auto nested_type = typeid_cast(type.get())->getNestedType(); + + auto & null_map = column_nullable.getNullMapData(); + IColumn & nested_column = column_nullable.getNestedColumn(); + + fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, random_seed); + + pcg32 generator(random_seed); + null_map.resize(limit); + for (UInt64 i = 0; i < limit; ++i) { + null_map[i] = generator() < 1024; + } + break; + } case TypeIndex::Function: - throw Exception("Random Generator not implemented for type 'Function'.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Type 'Funclion' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::AggregateFunction: throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::LowCardinality: throw Exception("Random Generator not implemented for type 'LowCardinality'.", ErrorCodes::NOT_IMPLEMENTED); } - return column; } StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const @@ -261,30 +403,48 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C ASTs & args = args_func.at(0)->children; - if (args.size() > 2) - throw Exception("Table function '" + getName() + "' requires one or two arguments: structure (and limit).", + if (args.size() > 5) + throw Exception("Table function '" + getName() + "' requires at most five arguments: "\ + " structure, limit, max_array_length, max_string_length, random_seed.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// Parsing first argument as table structure and creating a sample block std::string structure = args[0]->as().value.safeGet(); UInt64 limit = 1; + UInt64 max_array_length = 10; + UInt64 max_string_length = 10; + UInt64 random_seed = 0; // zero for random + /// Parsing second argument if present - if (args.size() == 2) + if (args.size() >= 2) limit = args[1]->as().value.safeGet(); if (!limit) throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); + if (args.size() >= 3) + max_array_length = args[1]->as().value.safeGet(); + + if (args.size() >= 4) + max_string_length = args[1]->as().value.safeGet(); + + if (args.size() == 5) + random_seed = args[1]->as().value.safeGet(); + ColumnsDescription columns = parseColumnsListFromString(structure, context); Block res_block; for (const auto & name_type : columns.getOrdinary()) { - MutableColumnPtr column = createColumnWithRandomData(name_type.type, limit); + MutableColumnPtr column = name_type.type->createColumn(); res_block.insert({std::move(column), name_type.type, name_type.name}); } + for (auto & ctn : res_block.getColumnsWithTypeAndName()) + { + fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, limit, max_array_length, max_string_length, random_seed); + } auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); return res; diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference new file mode 100644 index 00000000000..93ea1861756 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -0,0 +1,207 @@ +Enum8(\'hello\' = 1, \'world\' = 5) +world +hello +world +hello +hello +hello +world +hello +hello +hello +Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) +['world','hello','world','hello','hello','hello','world','hello','hello'] +['hello','world','world','hello','world','world'] +['hello','world','hello','hello','world','world','world'] +['world','world','world','world','world','world','hello','hello'] +['world','hello'] +['hello','hello'] +['world'] +['hello','hello'] +['hello','hello'] +['hello','world','hello','hello','world','world','world','world'] +Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) +o +w +w +w +h +w +h +h +w +o +UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 +2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 +9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 +4555697903102013946 4555697903102013946 275100647 275100647 46055 -19481 231 -25 +5784362079052877875 5784362079052877875 1033685688 1033685688 51896 -13640 184 -72 +11035971995277520997 -7410772078432030619 180895192 180895192 15832 15832 216 -40 +7901646768096461004 7901646768096461004 135557292 135557292 28844 28844 172 -84 +6733841386518201279 6733841386518201279 716914271 716914271 15967 15967 95 95 +7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 +2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 +3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 +Date DateTime DateTime(\'Europe/Moscow\') +2106-02-07 2009-02-16 23:59:49 2009-02-16 23:59:49 +2086-11-02 2007-02-20 10:43:46 2007-02-20 10:43:46 +2096-02-04 1978-09-20 03:50:47 1978-09-20 03:50:47 +2106-02-07 2002-10-04 02:54:48 2002-10-04 02:54:48 +2013-05-07 1975-09-25 19:39:52 1975-09-25 19:39:52 +2048-12-21 1974-04-19 01:48:12 1974-04-19 01:48:12 +2013-09-19 1992-09-19 18:51:11 1992-09-19 18:51:11 +1991-02-02 2002-01-28 12:47:02 2002-01-28 12:47:02 +2056-04-25 2039-04-06 20:11:02 2039-04-06 20:11:02 +2049-06-05 2053-11-20 07:10:58 2053-11-20 07:10:58 +DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') +2007-02-20 10:43:46.989 2007-02-20 10:43:46.817989 2007-02-20 10:43:46.817989 +2002-10-04 02:54:48.647 2002-10-04 02:54:48.100647 2002-10-04 02:54:48.100647 +1974-04-19 01:48:12.192 1974-04-19 01:48:12.895192 1974-04-19 01:48:12.895192 +2002-01-28 12:47:02.271 2002-01-28 12:47:02.914271 2002-01-28 12:47:02.914271 +2053-11-20 07:10:58.662 2053-11-20 07:10:58.722662 2053-11-20 07:10:58.722662 +1986-04-08 19:07:15.849 1986-04-08 19:07:15.510849 1986-04-08 19:07:15.510849 +2081-03-06 04:00:55.914 2081-03-06 04:00:55.448914 2081-03-06 04:00:55.448914 +1979-01-20 20:39:20.939 1979-01-20 20:39:20.162939 1979-01-20 20:39:20.162939 +2063-07-18 01:46:10.215 2063-07-18 01:46:10.908215 2063-07-18 01:46:10.908215 +1996-11-02 14:35:41.110 1996-11-02 14:35:41.183110 1996-11-02 14:35:41.183110 +Float32 Float64 +9.783235e37 2.1973467205491123e307 +9.285203e37 8.887754501811354e307 +2.1795718e37 4.4396706606805647e307 +8.1897013e37 5.637042481600483e307 +1.4331993e37 1.07549012514996e308 +1.0739954e37 7.700402896226395e307 +5.67998e37 6.562339881458101e307 +8.019563e37 7.539520705557441e307 +1.7317079e38 2.143274805821858e307 +2.0973474e38 2.9425818885529257e307 +Decimal32(4) Decimal64(8) Decimal64(8) +123481.7989 22547726199.26532955 4159321346419233104838.6879832895010840 +117195.7426 91200288583.97505560 8403779329565810688767.7049545291714611 +27510.0647 45556979031.02013946 -13670461591942827725055.0250490776469300 +103368.5688 57843620790.52877875 12421744869005473959544.2499747955622051 +18089.5192 -74107720784.32030619 4056969511333950153663.4915186231430947 +13555.7292 79016467680.96461004 -8819413736166121578589.4583420666183888 +71691.4271 67338413865.18201279 13058329479868658041313.8432372419860363 +101221.1222 77365600500.27905187 -4693380431928321782727.0243506636623202 +-210924.4634 21992875789.47862030 13765369952377767241248.9441272127848016 +-164774.2638 30194839130.99890467 -13890064946313418575619.0315227826809939 +UUID +1f4a8fc0-63ff-735b-7e90-d9ed3e183818 +3f39171b-1263-31fa-5046-2ea9fe2fd033 +9927a60f-01ac-f065-6da8-49def100c0cc +5d736910-493d-c3bf-6b5d-c8601d6440a3 +1e857066-961d-be0e-29e7-5c9efd534f23 +bda66d4f-737b-3622-b60f-aa27fe38ff30 +623d6d82-4422-2885-297f-7b2fec54178b +dcb0e0ca-3a43-5f2e-556e-7945df65729e +678f2360-36ac-d439-8d6d-f92295887e50 +9780b53e-dc0f-4a21-bdb3-9798af1913ad +Tuple(Int32, Int64) +(1234817989,2254772619926532955) +(1171957426,9120028858397505560) +(275100647,4555697903102013946) +(1033685688,5784362079052877875) +(180895192,-7410772078432030619) +(135557292,7901646768096461004) +(716914271,6733841386518201279) +(1012211222,7736560050027905187) +(-2109244634,2199287578947862030) +(-1647742638,3019483913099890467) +Array(Int8) +[-59,-78,-25,-72,-40,-84,95,22,38] +[82,65,35,-110,-57,-69] +[72,119,-78,-58,13,39,-71] +[81,107,-11,-63,-59,69,-80,-122] +[87,-76] +[22,-84] +[-45] +[-40,84] +[-104,-86] +[-36,123,44,60,5,25,-5,-127] +Array(Nullable(Int32)) +[1234817989,1171957426,275100647,1033685688,180895192,135557292,716914271,1012211222,-2109244634] +[-1647742638,319510849,513356835,-1966518382,-786518841,269162939] +[285701960,1943908215,-1343029326,1474183110,846934541,1007818023,-1664171079] +[195050577,371018347,734173429,2001591233,-1812297275,1172704837,-728923984,774864518] +[-462583209,-1520633676] +[-638906858,1986832300] +[378774483] +[-1399152424,-953863084] +[733724312,-23652950] +[371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] +Array(Nullable(UUID)) +['1f4a8fc0-63ff-735b-7e90-d9ed3e183818','3f39171b-1263-31fa-5046-2ea9fe2fd033','9927a60f-01ac-f065-6da8-49def100c0cc','5d736910-493d-c3bf-6b5d-c8601d6440a3','1e857066-961d-be0e-29e7-5c9efd534f23','bda66d4f-737b-3622-b60f-aa27fe38ff30','623d6d82-4422-2885-297f-7b2fec54178b','dcb0e0ca-3a43-5f2e-556e-7945df65729e','678f2360-36ac-d439-8d6d-f92295887e50'] +['9780b53e-dc0f-4a21-bdb3-9798af1913ad','c79810de-3635-d333-5ca1-7a81ab302b25','1c756bca-4438-3f17-a766-c8bcbe3ba400','d9072738-ac93-7ed6-167b-3c3c66d35a18','b1e8dec2-de29-3c9f-aaf2-f78fd92df3ce','9cd25f9f-3c0d-f43d-5a46-0194f0be04dd'] +['10a4718d-ab8c-49c6-c785-66ccf112f7d5','02ac2bf5-5634-a5a8-9a18-05ce8d1fb583','8037a13d-2004-08f2-f831-fa2387f5c29a','a99c4373-1121-2691-ecbb-216adbd748c7','ef0986ff-5031-0353-2f21-1de3ea53af08','778064a7-653b-ef7b-c77b-4d769b12b917','a1607e6f-691a-0ff0-b0b3-e454dae7bef7'] +['71c1b47a-c0eb-42b5-eecd-18dc585284fd','72bbf272-9ec5-09ec-f339-b5dac55c037b','26e5bce5-43f7-59b0-84c6-ef509f4c45eb','305fcbff-c366-2033-a8c5-d648f236e754','3a0d329f-f897-84e9-9e87-9501a713e63d','54bda20c-d5cd-a08a-c078-3c4fd81f4f55','43f549d1-3e5b-d5bf-ed32-b4850648bdc8','7eb6ac4f-06e0-ff48-6330-3c7afa5f2644'] +['17b9a4a5-fef8-a3f9-5af4-3b6e67ca62c9','3f524d8e-320d-00dc-c210-e199206550db'] +['005c592e-5081-9f3d-1fcb-5a9e82f39f97','29cf228d-b325-4a34-3eff-e80494a79260'] +['6c08b54b-8cf8-b96d-f087-8b54f5e72d0e'] +['7122e162-ab8b-a84a-6b71-c0846cf0204d','51c1de1a-24c7-18d6-39ed-e9023205610c'] +['f09d6779-1106-d667-e7c9-9a0cad544afe','62060fec-ee13-7c66-5da4-02c8f4d50dc9'] +['df1d0d54-d639-9c9b-2070-622fc9d82203','f23ef5b9-3797-9b0e-b8ac-67ea31b99c3e','e48afe73-9e22-7439-afed-d53b6ea204f4','d7f1ab47-4928-7623-283e-fb3f16aebeba','ea270407-d32f-a407-add2-3ae2d1113ccb','c43e9fff-2980-a1d1-f1bb-ff94d3cffbc2','a0cd54e6-0a2d-07ec-88ad-4f5d29c15b06','5e93413f-2eb9-5363-17ab-e2215b8b19e0'] +Tuple(Int32, Array(Int64)) +(1234817989,[2254772619926532955,9120028858397505560,4555697903102013946,5784362079052877875,-7410772078432030619,7901646768096461004,6733841386518201279,7736560050027905187,2199287578947862030]) +(1171957426,[3019483913099890467,-4781013766399904222,-5327852745410412752,7078934595552553093,2990244123355912075,-2544286630298820818]) +(275100647,[6155991081669718686,7462222003717329977,-8255668614967296432,-7529819295378879967,-4777308097681484883,-4064480117123591373,6674750820081216293]) +(1033685688,[2050663721809231639,-6384194708780112896,-2808232718275215658,1619954721090656792,-5627002805867168609,-6128563945701772338,-7146544521171569603,6504888450989032669]) +(180895192,[1199208254069819846,-4069733657855461419]) +(135557292,[192577216783361448,-7343112807738526333]) +(716914271,[-9207713629233477390]) +(1012211222,[-562393447932771686,-6225026423445182831]) +(-2109244634,[-1388479317275096889,-1222297392734207149]) +(-1647742638,[3396028458740199176,8610993157653131131,-4072576266223306473,-6818310818869145616,-5713972449102020873,8197031236106666677,-1239306987803343619,8267468115072584172]) +FixedString(4) +Ų +ج_ +&RA# +ǻH +w\r +\'Qk +E +W + +T +String +String +String +String +String +String +String +String +String +String +)/VC)%f9 +\0ih|;B +\0J"Z,kd +\0m"m]$35 +\00 +\0( +\0 +\0g +\0> +\0XjbW:s< +Nullable(String) +)/VC)%f9 +\0ih|;B +\0J"Z,kd +\0m"m]$35 +\00 +\0( +\0 +\0g +\0> +\0XjbW:s< +Array(String) +['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] +['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] +['','\0\\p]|]','\05','\0k$C/pnA'] +['\0ryz{*p',''] +['\07`mjt*G',''] +['\0~g'] +['\0k','\0 '] +['\0F','\0&h diff --git a/docs/ja/query_language/table_functions/generate.md b/docs/ja/query_language/table_functions/generate.md new file mode 120000 index 00000000000..de0b0a41754 --- /dev/null +++ b/docs/ja/query_language/table_functions/generate.md @@ -0,0 +1 @@ +en/query_language/table_functions/generate.md \ No newline at end of file diff --git a/docs/ru/query_language/table_functions/generate.md b/docs/ru/query_language/table_functions/generate.md new file mode 100644 index 00000000000..11d7f7073a9 --- /dev/null +++ b/docs/ru/query_language/table_functions/generate.md @@ -0,0 +1,37 @@ +# generate + +Генерирует случайные данные с заданной схемой. +Позволяет заполнять тестовые таблицы данными. +Поддерживает все типы данных, которые могут храниться в таблице, за исключением LowCardinality, AggregateFunction. + +```sql +generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); +``` + +**Входные параметры** +- `name` — название соответствующего столбца. +- `TypeName` — тип соответствующего столбца. +- `limit` — количество строк для генерации. +- `max_array_length` — максимальная длина массива для всех сгенерированных массивов. По умолчанию `10`. +- `max_string_length` — максимальная длина строки для всех генерируемых строк. По умолчанию `10`. +- `random_seed` — укажите состояние генератора случайных чисел вручную, чтобы получить стабильные результаты. По умолчанию `0` - генератор инициализируется случайным состоянием. + +**Возвращаемое значение** + +Объект таблицы с запрошенной схемой. + +## Пример + + +```sql +SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); +``` +```text +┌─a────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐ +│ [77] │ -124167.6723 │ ('2061-04-17 21:59:44.573','3f72f405-ec3e-13c8-44ca-66ef335f7835') │ +│ [32,110] │ -141397.7312 │ ('1979-02-09 03:43:48.526','982486d1-5a5d-a308-e525-7bd8b80ffa73') │ +│ [68] │ -67417.0770 │ ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') │ +└──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/generate/) diff --git a/docs/zh/query_language/table_functions/generate.md b/docs/zh/query_language/table_functions/generate.md new file mode 120000 index 00000000000..de0b0a41754 --- /dev/null +++ b/docs/zh/query_language/table_functions/generate.md @@ -0,0 +1 @@ +en/query_language/table_functions/generate.md \ No newline at end of file From 9e542f6cb0a9b0788adb86bce159827878cb8fc9 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Feb 2020 15:25:09 +0300 Subject: [PATCH 050/215] doc fixes --- docs/en/query_language/table_functions/generate.md | 2 +- docs/ru/query_language/table_functions/generate.md | 2 +- docs/toc_en.yml | 1 + docs/toc_fa.yml | 1 + docs/toc_ja.yml | 3 ++- docs/toc_ru.yml | 1 + docs/toc_zh.yml | 1 + 7 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/en/query_language/table_functions/generate.md b/docs/en/query_language/table_functions/generate.md index c04ebff8a1a..ed9e2150b03 100644 --- a/docs/en/query_language/table_functions/generate.md +++ b/docs/en/query_language/table_functions/generate.md @@ -2,7 +2,7 @@ Generates random data with given schema. Allows to populate test tables with data. -Supports all data types that can be stored in table except LowCardinality, AggregateFunction. +Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`. ```sql generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); diff --git a/docs/ru/query_language/table_functions/generate.md b/docs/ru/query_language/table_functions/generate.md index 11d7f7073a9..53544d16e7d 100644 --- a/docs/ru/query_language/table_functions/generate.md +++ b/docs/ru/query_language/table_functions/generate.md @@ -2,7 +2,7 @@ Генерирует случайные данные с заданной схемой. Позволяет заполнять тестовые таблицы данными. -Поддерживает все типы данных, которые могут храниться в таблице, за исключением LowCardinality, AggregateFunction. +Поддерживает все типы данных, которые могут храниться в таблице, за исключением `LowCardinality` и `AggregateFunction`. ```sql generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 8558216b15b..76d115045e7 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -142,6 +142,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index bd1e84d590e..280d5a6f53a 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -168,6 +168,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index f47bc065890..6661300a97e 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -140,7 +140,8 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'Dictionaries': + - 'generate': 'query_language/table_functions/generate.md' +- 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': - 'General Description': 'query_language/dicts/external_dicts.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 5999ac74b56..06f196fd2f5 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -141,6 +141,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - 'Словари': - 'Введение': 'query_language/dicts/index.md' - 'Внешние словари': diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index e85c6b50f27..c7ec13a1943 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -167,6 +167,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' + - 'generate': 'query_language/table_functions/generate.md' - '字典': - '介绍': 'query_language/dicts/index.md' - '外部字典': From 41ffe2d9f9b14dca40a554b2d95c0d628fe6b29c Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Feb 2020 16:29:08 +0300 Subject: [PATCH 051/215] style fixes --- .../src/TableFunctions/TableFunctionRandom.cpp | 18 ++++++++++++------ .../query_language/table_functions/generate.md | 1 + .../query_language/table_functions/generate.md | 2 +- .../query_language/table_functions/generate.md | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) create mode 120000 docs/fa/query_language/table_functions/generate.md diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index d70c8a73c63..fff1ed83539 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -224,12 +224,14 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { pcg32 generator(random_seed); offsets.resize(limit); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { offset += 1 + static_cast(generator()) % max_string_length; offsets[i] = offset - 1; } chars.resize(offset); - for (UInt64 i = 0; i < offset; ++i) { + for (UInt64 i = 0; i < offset; ++i) + { chars[i] = 32 + generator() % 95; } // add terminating zero char @@ -250,7 +252,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { pcg32 generator(random_seed); chars.resize(num_chars); - for (UInt64 i = 0; i < num_chars; ++i) { + for (UInt64 i = 0; i < num_chars; ++i) + { chars[i] = static_cast(generator()); } } @@ -327,7 +330,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, auto & data = typeid_cast &>(column).getData(); data.resize(limit); pcg64 generator(random_seed); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { auto x = UInt128(generator(), generator()); data[i] = x; } @@ -345,7 +349,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { pcg32 generator(random_seed); offsets.resize(limit); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { offset += static_cast(generator()) % max_array_length; offsets[i] = offset; } @@ -380,7 +385,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, pcg32 generator(random_seed); null_map.resize(limit); - for (UInt64 i = 0; i < limit; ++i) { + for (UInt64 i = 0; i < limit; ++i) + { null_map[i] = generator() < 1024; } break; diff --git a/docs/fa/query_language/table_functions/generate.md b/docs/fa/query_language/table_functions/generate.md new file mode 120000 index 00000000000..141c05da1e3 --- /dev/null +++ b/docs/fa/query_language/table_functions/generate.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/generate.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/generate.md b/docs/ja/query_language/table_functions/generate.md index de0b0a41754..141c05da1e3 120000 --- a/docs/ja/query_language/table_functions/generate.md +++ b/docs/ja/query_language/table_functions/generate.md @@ -1 +1 @@ -en/query_language/table_functions/generate.md \ No newline at end of file +../../../en/query_language/table_functions/generate.md \ No newline at end of file diff --git a/docs/zh/query_language/table_functions/generate.md b/docs/zh/query_language/table_functions/generate.md index de0b0a41754..141c05da1e3 120000 --- a/docs/zh/query_language/table_functions/generate.md +++ b/docs/zh/query_language/table_functions/generate.md @@ -1 +1 @@ -en/query_language/table_functions/generate.md \ No newline at end of file +../../../en/query_language/table_functions/generate.md \ No newline at end of file From ceaff363b228532a9ad4d6e50dbd4bb79d7e575a Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Feb 2020 15:34:11 +0300 Subject: [PATCH 052/215] recanonize linux build --- .../01072_random_table_function.reference | 73 ++++++++++++++----- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference index 93ea1861756..3906b417524 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.reference +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -9,6 +9,7 @@ world hello hello hello +- Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) ['world','hello','world','hello','hello','hello','world','hello','hello'] ['hello','world','world','hello','world','world'] @@ -20,6 +21,7 @@ Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) ['hello','hello'] ['hello','hello'] ['hello','world','hello','hello','world','world','world','world'] +- Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) o w @@ -31,6 +33,7 @@ h h w o +- UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 @@ -42,6 +45,7 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 +- Date DateTime DateTime(\'Europe/Moscow\') 2106-02-07 2009-02-16 23:59:49 2009-02-16 23:59:49 2086-11-02 2007-02-20 10:43:46 2007-02-20 10:43:46 @@ -53,6 +57,7 @@ Date DateTime DateTime(\'Europe/Moscow\') 1991-02-02 2002-01-28 12:47:02 2002-01-28 12:47:02 2056-04-25 2039-04-06 20:11:02 2039-04-06 20:11:02 2049-06-05 2053-11-20 07:10:58 2053-11-20 07:10:58 +- DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') 2007-02-20 10:43:46.989 2007-02-20 10:43:46.817989 2007-02-20 10:43:46.817989 2002-10-04 02:54:48.647 2002-10-04 02:54:48.100647 2002-10-04 02:54:48.100647 @@ -64,6 +69,7 @@ DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') 1979-01-20 20:39:20.939 1979-01-20 20:39:20.162939 1979-01-20 20:39:20.162939 2063-07-18 01:46:10.215 2063-07-18 01:46:10.908215 2063-07-18 01:46:10.908215 1996-11-02 14:35:41.110 1996-11-02 14:35:41.183110 1996-11-02 14:35:41.183110 +- Float32 Float64 9.783235e37 2.1973467205491123e307 9.285203e37 8.887754501811354e307 @@ -75,6 +81,7 @@ Float32 Float64 8.019563e37 7.539520705557441e307 1.7317079e38 2.143274805821858e307 2.0973474e38 2.9425818885529257e307 +- Decimal32(4) Decimal64(8) Decimal64(8) 123481.7989 22547726199.26532955 4159321346419233104838.6879832895010840 117195.7426 91200288583.97505560 8403779329565810688767.7049545291714611 @@ -86,17 +93,19 @@ Decimal32(4) Decimal64(8) Decimal64(8) 101221.1222 77365600500.27905187 -4693380431928321782727.0243506636623202 -210924.4634 21992875789.47862030 13765369952377767241248.9441272127848016 -164774.2638 30194839130.99890467 -13890064946313418575619.0315227826809939 +- UUID -1f4a8fc0-63ff-735b-7e90-d9ed3e183818 -3f39171b-1263-31fa-5046-2ea9fe2fd033 -9927a60f-01ac-f065-6da8-49def100c0cc -5d736910-493d-c3bf-6b5d-c8601d6440a3 -1e857066-961d-be0e-29e7-5c9efd534f23 -bda66d4f-737b-3622-b60f-aa27fe38ff30 -623d6d82-4422-2885-297f-7b2fec54178b -dcb0e0ca-3a43-5f2e-556e-7945df65729e -678f2360-36ac-d439-8d6d-f92295887e50 -9780b53e-dc0f-4a21-bdb3-9798af1913ad +7e90d9ed-3e18-3818-1f4a-8fc063ff735b +50462ea9-fe2f-d033-3f39-171b126331fa +6da849de-f100-c0cc-9927-a60f01acf065 +6b5dc860-1d64-40a3-5d73-6910493dc3bf +29e75c9e-fd53-4f23-1e85-7066961dbe0e +b60faa27-fe38-ff30-bda6-6d4f737b3622 +297f7b2f-ec54-178b-623d-6d8244222885 +556e7945-df65-729e-dcb0-e0ca3a435f2e +8d6df922-9588-7e50-678f-236036acd439 +bdb39798-af19-13ad-9780-b53edc0f4a21 +- Tuple(Int32, Int64) (1234817989,2254772619926532955) (1171957426,9120028858397505560) @@ -108,6 +117,7 @@ Tuple(Int32, Int64) (1012211222,7736560050027905187) (-2109244634,2199287578947862030) (-1647742638,3019483913099890467) +- Array(Int8) [-59,-78,-25,-72,-40,-84,95,22,38] [82,65,35,-110,-57,-69] @@ -119,6 +129,7 @@ Array(Int8) [-40,84] [-104,-86] [-36,123,44,60,5,25,-5,-127] +- Array(Nullable(Int32)) [1234817989,1171957426,275100647,1033685688,180895192,135557292,716914271,1012211222,-2109244634] [-1647742638,319510849,513356835,-1966518382,-786518841,269162939] @@ -130,17 +141,19 @@ Array(Nullable(Int32)) [-1399152424,-953863084] [733724312,-23652950] [371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] +- Array(Nullable(UUID)) -['1f4a8fc0-63ff-735b-7e90-d9ed3e183818','3f39171b-1263-31fa-5046-2ea9fe2fd033','9927a60f-01ac-f065-6da8-49def100c0cc','5d736910-493d-c3bf-6b5d-c8601d6440a3','1e857066-961d-be0e-29e7-5c9efd534f23','bda66d4f-737b-3622-b60f-aa27fe38ff30','623d6d82-4422-2885-297f-7b2fec54178b','dcb0e0ca-3a43-5f2e-556e-7945df65729e','678f2360-36ac-d439-8d6d-f92295887e50'] -['9780b53e-dc0f-4a21-bdb3-9798af1913ad','c79810de-3635-d333-5ca1-7a81ab302b25','1c756bca-4438-3f17-a766-c8bcbe3ba400','d9072738-ac93-7ed6-167b-3c3c66d35a18','b1e8dec2-de29-3c9f-aaf2-f78fd92df3ce','9cd25f9f-3c0d-f43d-5a46-0194f0be04dd'] -['10a4718d-ab8c-49c6-c785-66ccf112f7d5','02ac2bf5-5634-a5a8-9a18-05ce8d1fb583','8037a13d-2004-08f2-f831-fa2387f5c29a','a99c4373-1121-2691-ecbb-216adbd748c7','ef0986ff-5031-0353-2f21-1de3ea53af08','778064a7-653b-ef7b-c77b-4d769b12b917','a1607e6f-691a-0ff0-b0b3-e454dae7bef7'] -['71c1b47a-c0eb-42b5-eecd-18dc585284fd','72bbf272-9ec5-09ec-f339-b5dac55c037b','26e5bce5-43f7-59b0-84c6-ef509f4c45eb','305fcbff-c366-2033-a8c5-d648f236e754','3a0d329f-f897-84e9-9e87-9501a713e63d','54bda20c-d5cd-a08a-c078-3c4fd81f4f55','43f549d1-3e5b-d5bf-ed32-b4850648bdc8','7eb6ac4f-06e0-ff48-6330-3c7afa5f2644'] -['17b9a4a5-fef8-a3f9-5af4-3b6e67ca62c9','3f524d8e-320d-00dc-c210-e199206550db'] -['005c592e-5081-9f3d-1fcb-5a9e82f39f97','29cf228d-b325-4a34-3eff-e80494a79260'] -['6c08b54b-8cf8-b96d-f087-8b54f5e72d0e'] -['7122e162-ab8b-a84a-6b71-c0846cf0204d','51c1de1a-24c7-18d6-39ed-e9023205610c'] -['f09d6779-1106-d667-e7c9-9a0cad544afe','62060fec-ee13-7c66-5da4-02c8f4d50dc9'] -['df1d0d54-d639-9c9b-2070-622fc9d82203','f23ef5b9-3797-9b0e-b8ac-67ea31b99c3e','e48afe73-9e22-7439-afed-d53b6ea204f4','d7f1ab47-4928-7623-283e-fb3f16aebeba','ea270407-d32f-a407-add2-3ae2d1113ccb','c43e9fff-2980-a1d1-f1bb-ff94d3cffbc2','a0cd54e6-0a2d-07ec-88ad-4f5d29c15b06','5e93413f-2eb9-5363-17ab-e2215b8b19e0'] +['7e90d9ed-3e18-3818-1f4a-8fc063ff735b','50462ea9-fe2f-d033-3f39-171b126331fa','6da849de-f100-c0cc-9927-a60f01acf065','6b5dc860-1d64-40a3-5d73-6910493dc3bf','29e75c9e-fd53-4f23-1e85-7066961dbe0e','b60faa27-fe38-ff30-bda6-6d4f737b3622','297f7b2f-ec54-178b-623d-6d8244222885','556e7945-df65-729e-dcb0-e0ca3a435f2e','8d6df922-9588-7e50-678f-236036acd439'] +['bdb39798-af19-13ad-9780-b53edc0f4a21','5ca17a81-ab30-2b25-c798-10de3635d333','a766c8bc-be3b-a400-1c75-6bca44383f17','167b3c3c-66d3-5a18-d907-2738ac937ed6','aaf2f78f-d92d-f3ce-b1e8-dec2de293c9f','5a460194-f0be-04dd-9cd2-5f9f3c0df43d'] +['c78566cc-f112-f7d5-10a4-718dab8c49c6','9a1805ce-8d1f-b583-02ac-2bf55634a5a8','f831fa23-87f5-c29a-8037-a13d200408f2','ecbb216a-dbd7-48c7-a99c-437311212691','2f211de3-ea53-af08-ef09-86ff50310353','c77b4d76-9b12-b917-7780-64a7653bef7b','b0b3e454-dae7-bef7-a160-7e6f691a0ff0'] +['eecd18dc-5852-84fd-71c1-b47ac0eb42b5','f339b5da-c55c-037b-72bb-f2729ec509ec','84c6ef50-9f4c-45eb-26e5-bce543f759b0','a8c5d648-f236-e754-305f-cbffc3662033','9e879501-a713-e63d-3a0d-329ff89784e9','c0783c4f-d81f-4f55-54bd-a20cd5cda08a','ed32b485-0648-bdc8-43f5-49d13e5bd5bf','63303c7a-fa5f-2644-7eb6-ac4f06e0ff48'] +['5af43b6e-67ca-62c9-17b9-a4a5fef8a3f9','c210e199-2065-50db-3f52-4d8e320d00dc'] +['1fcb5a9e-82f3-9f97-005c-592e50819f3d','3effe804-94a7-9260-29cf-228db3254a34'] +['f0878b54-f5e7-2d0e-6c08-b54b8cf8b96d'] +['6b71c084-6cf0-204d-7122-e162ab8ba84a','39ede902-3205-610c-51c1-de1a24c718d6'] +['e7c99a0c-ad54-4afe-f09d-67791106d667','5da402c8-f4d5-0dc9-6206-0fecee137c66'] +['2070622f-c9d8-2203-df1d-0d54d6399c9b','b8ac67ea-31b9-9c3e-f23e-f5b937979b0e','afedd53b-6ea2-04f4-e48a-fe739e227439','283efb3f-16ae-beba-d7f1-ab4749287623','add23ae2-d111-3ccb-ea27-0407d32fa407','f1bbff94-d3cf-fbc2-c43e-9fff2980a1d1','88ad4f5d-29c1-5b06-a0cd-54e60a2d07ec','17abe221-5b8b-19e0-5e93-413f2eb95363'] +- Tuple(Int32, Array(Int64)) (1234817989,[2254772619926532955,9120028858397505560,4555697903102013946,5784362079052877875,-7410772078432030619,7901646768096461004,6733841386518201279,7736560050027905187,2199287578947862030]) (1171957426,[3019483913099890467,-4781013766399904222,-5327852745410412752,7078934595552553093,2990244123355912075,-2544286630298820818]) @@ -152,6 +165,7 @@ Tuple(Int32, Array(Int64)) (1012211222,[-562393447932771686,-6225026423445182831]) (-2109244634,[-1388479317275096889,-1222297392734207149]) (-1647742638,[3396028458740199176,8610993157653131131,-4072576266223306473,-6818310818869145616,-5713972449102020873,8197031236106666677,-1239306987803343619,8267468115072584172]) +- FixedString(4) Ų ج_ @@ -163,6 +177,7 @@ w W  T +- String String String @@ -183,6 +198,7 @@ String \0g \0> \0XjbW:s< +- Nullable(String) )/VC)%f9 \0ih|;B @@ -194,6 +210,7 @@ Nullable(String) \0g \0> \0XjbW:s< +- Array(String) ['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] ['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] @@ -205,3 +222,19 @@ Array(String) ['\0k','\0 '] ['\0F','\0&h -210924.4634 w 2.143274805821858e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','8d6df922-9588-7e50-678f-236036acd439') w +[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 2.9425818885529257e307 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','bdb39798-af19-13ad-9780-b53edc0f4a21') \r +- From e2688e030d9a1f84f5caa4a14f0a8cd43ed0c2db Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 11 Feb 2020 12:53:54 +0300 Subject: [PATCH 053/215] fix indufficient arguments parsing --- dbms/src/TableFunctions/TableFunctionRandom.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index fff1ed83539..a08dbe6b691 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -409,6 +409,11 @@ StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const C ASTs & args = args_func.at(0)->children; + if (args.size() < 2) + throw Exception("Table function '" + getName() + "' requires at least two arguments: "\ + " structure, limit(, max_array_length, max_string_length, random_seed).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (args.size() > 5) throw Exception("Table function '" + getName() + "' requires at most five arguments: "\ " structure, limit, max_array_length, max_string_length, random_seed.", From a7ec7a6c4ddfde0a2be304bbdd097df434aa6446 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 11 Feb 2020 17:05:51 +0300 Subject: [PATCH 054/215] pcg64 -> pcg64_oneseq --- .../TableFunctions/TableFunctionRandom.cpp | 13 +- .../01072_random_table_function.reference | 146 +++++++++--------- .../01072_random_table_function.sql | 84 +++++----- 3 files changed, 122 insertions(+), 121 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index a08dbe6b691..f23b98bfbaa 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -92,7 +92,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64 generator(random_seed); + pcg64_oneseq generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -138,7 +138,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64 generator(random_seed); + pcg64_oneseq generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -164,7 +164,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64 generator(random_seed); + pcg64_oneseq generator(random_seed); double d = 1.0; for (UInt64 i = 0; i < limit; ++i) { @@ -306,7 +306,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64 generator(random_seed); + pcg64_oneseq generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -317,7 +317,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64 generator(random_seed); + pcg64_oneseq generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { Int128 x = static_cast(generator()) << 64 | static_cast(generator()); @@ -329,7 +329,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64 generator(random_seed); + pcg64_oneseq generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { auto x = UInt128(generator(), generator()); @@ -467,3 +467,4 @@ void registerTableFunctionRandom(TableFunctionFactory & factory) } } + diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference index 3906b417524..2770d1fcaf3 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.reference +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -1,3 +1,15 @@ +UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 +2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 +9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 +4555697903102013946 4555697903102013946 275100647 275100647 46055 -19481 231 -25 +5784362079052877875 5784362079052877875 1033685688 1033685688 51896 -13640 184 -72 +11035971995277520997 -7410772078432030619 180895192 180895192 15832 15832 216 -40 +7901646768096461004 7901646768096461004 135557292 135557292 28844 28844 172 -84 +6733841386518201279 6733841386518201279 716914271 716914271 15967 15967 95 95 +7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 +2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 +3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 +- Enum8(\'hello\' = 1, \'world\' = 5) world hello @@ -34,18 +46,6 @@ h w o - -UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 -9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 -4555697903102013946 4555697903102013946 275100647 275100647 46055 -19481 231 -25 -5784362079052877875 5784362079052877875 1033685688 1033685688 51896 -13640 184 -72 -11035971995277520997 -7410772078432030619 180895192 180895192 15832 15832 216 -40 -7901646768096461004 7901646768096461004 135557292 135557292 28844 28844 172 -84 -6733841386518201279 6733841386518201279 716914271 716914271 15967 15967 95 95 -7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 -2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 -3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 -- Date DateTime DateTime(\'Europe/Moscow\') 2106-02-07 2009-02-16 23:59:49 2009-02-16 23:59:49 2086-11-02 2007-02-20 10:43:46 2007-02-20 10:43:46 @@ -94,18 +94,6 @@ Decimal32(4) Decimal64(8) Decimal64(8) -210924.4634 21992875789.47862030 13765369952377767241248.9441272127848016 -164774.2638 30194839130.99890467 -13890064946313418575619.0315227826809939 - -UUID -7e90d9ed-3e18-3818-1f4a-8fc063ff735b -50462ea9-fe2f-d033-3f39-171b126331fa -6da849de-f100-c0cc-9927-a60f01acf065 -6b5dc860-1d64-40a3-5d73-6910493dc3bf -29e75c9e-fd53-4f23-1e85-7066961dbe0e -b60faa27-fe38-ff30-bda6-6d4f737b3622 -297f7b2f-ec54-178b-623d-6d8244222885 -556e7945-df65-729e-dcb0-e0ca3a435f2e -8d6df922-9588-7e50-678f-236036acd439 -bdb39798-af19-13ad-9780-b53edc0f4a21 -- Tuple(Int32, Int64) (1234817989,2254772619926532955) (1171957426,9120028858397505560) @@ -142,18 +130,6 @@ Array(Nullable(Int32)) [733724312,-23652950] [371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] - -Array(Nullable(UUID)) -['7e90d9ed-3e18-3818-1f4a-8fc063ff735b','50462ea9-fe2f-d033-3f39-171b126331fa','6da849de-f100-c0cc-9927-a60f01acf065','6b5dc860-1d64-40a3-5d73-6910493dc3bf','29e75c9e-fd53-4f23-1e85-7066961dbe0e','b60faa27-fe38-ff30-bda6-6d4f737b3622','297f7b2f-ec54-178b-623d-6d8244222885','556e7945-df65-729e-dcb0-e0ca3a435f2e','8d6df922-9588-7e50-678f-236036acd439'] -['bdb39798-af19-13ad-9780-b53edc0f4a21','5ca17a81-ab30-2b25-c798-10de3635d333','a766c8bc-be3b-a400-1c75-6bca44383f17','167b3c3c-66d3-5a18-d907-2738ac937ed6','aaf2f78f-d92d-f3ce-b1e8-dec2de293c9f','5a460194-f0be-04dd-9cd2-5f9f3c0df43d'] -['c78566cc-f112-f7d5-10a4-718dab8c49c6','9a1805ce-8d1f-b583-02ac-2bf55634a5a8','f831fa23-87f5-c29a-8037-a13d200408f2','ecbb216a-dbd7-48c7-a99c-437311212691','2f211de3-ea53-af08-ef09-86ff50310353','c77b4d76-9b12-b917-7780-64a7653bef7b','b0b3e454-dae7-bef7-a160-7e6f691a0ff0'] -['eecd18dc-5852-84fd-71c1-b47ac0eb42b5','f339b5da-c55c-037b-72bb-f2729ec509ec','84c6ef50-9f4c-45eb-26e5-bce543f759b0','a8c5d648-f236-e754-305f-cbffc3662033','9e879501-a713-e63d-3a0d-329ff89784e9','c0783c4f-d81f-4f55-54bd-a20cd5cda08a','ed32b485-0648-bdc8-43f5-49d13e5bd5bf','63303c7a-fa5f-2644-7eb6-ac4f06e0ff48'] -['5af43b6e-67ca-62c9-17b9-a4a5fef8a3f9','c210e199-2065-50db-3f52-4d8e320d00dc'] -['1fcb5a9e-82f3-9f97-005c-592e50819f3d','3effe804-94a7-9260-29cf-228db3254a34'] -['f0878b54-f5e7-2d0e-6c08-b54b8cf8b96d'] -['6b71c084-6cf0-204d-7122-e162ab8ba84a','39ede902-3205-610c-51c1-de1a24c718d6'] -['e7c99a0c-ad54-4afe-f09d-67791106d667','5da402c8-f4d5-0dc9-6206-0fecee137c66'] -['2070622f-c9d8-2203-df1d-0d54d6399c9b','b8ac67ea-31b9-9c3e-f23e-f5b937979b0e','afedd53b-6ea2-04f4-e48a-fe739e227439','283efb3f-16ae-beba-d7f1-ab4749287623','add23ae2-d111-3ccb-ea27-0407d32fa407','f1bbff94-d3cf-fbc2-c43e-9fff2980a1d1','88ad4f5d-29c1-5b06-a0cd-54e60a2d07ec','17abe221-5b8b-19e0-5e93-413f2eb95363'] -- Tuple(Int32, Array(Int64)) (1234817989,[2254772619926532955,9120028858397505560,4555697903102013946,5784362079052877875,-7410772078432030619,7901646768096461004,6733841386518201279,7736560050027905187,2199287578947862030]) (1171957426,[3019483913099890467,-4781013766399904222,-5327852745410412752,7078934595552553093,2990244123355912075,-2544286630298820818]) @@ -166,6 +142,54 @@ Tuple(Int32, Array(Int64)) (-2109244634,[-1388479317275096889,-1222297392734207149]) (-1647742638,[3396028458740199176,8610993157653131131,-4072576266223306473,-6818310818869145616,-5713972449102020873,8197031236106666677,-1239306987803343619,8267468115072584172]) - +Nullable(String) +)/VC)%f9 +\0ih|;B +\0J"Z,kd +\0m"m]$35 +\00 +\0( +\0 +\0g +\0> +\0XjbW:s< +- +Array(String) +['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] +['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] +['','\0\\p]|]','\05','\0k$C/pnA'] +['\0ryz{*p',''] +['\07`mjt*G',''] +['\0~g'] +['\0k','\0 '] +['\0F','\0&h \0XjbW:s< - -Nullable(String) -)/VC)%f9 -\0ih|;B -\0J"Z,kd -\0m"m]$35 -\00 -\0( -\0 -\0g -\0> -\0XjbW:s< +[77] -124167.6723 ('2061-04-17 21:59:44.573','3f72f405-ec3e-13c8-44ca-66ef335f7835') +[32,110] -141397.7312 ('1979-02-09 03:43:48.526','982486d1-5a5d-a308-e525-7bd8b80ffa73') +[68] -67417.0770 ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') - -Array(String) -['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] -['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] -['','\0\\p]|]','\05','\0k$C/pnA'] -['\0ryz{*p',''] -['\07`mjt*G',''] -['\0~g'] -['\0k','\0 '] -['\0F','\0&h -210924.4634 w 2.143274805821858e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','8d6df922-9588-7e50-678f-236036acd439') w -[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 2.9425818885529257e307 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','bdb39798-af19-13ad-9780-b53edc0f4a21') \r +[-59,-78,-25,-72,-40,-84,95,22,38] 1234817989 )/VC)%f9 123481.7989 o 2.1973467205491123e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','1f4a8fc0-63ff-735b-7e90-d9ed3e183818') Ų +[82,65,35,-110,-57,-69] 1171957426 \0ih|;B 117195.7426 w 8.887754501811354e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','3f39171b-1263-31fa-5046-2ea9fe2fd033') +[72,119,-78,-58,13,39,-71] 275100647 \0J"Z,kd 27510.0647 w 4.4396706606805647e307 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','9927a60f-01ac-f065-6da8-49def100c0cc') ج +[81,107,-11,-63,-59,69,-80,-122] 1033685688 \0m"m]$35 103368.5688 w 5.637042481600483e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','5d736910-493d-c3bf-6b5d-c8601d6440a3') _ +[87,-76] 180895192 \00 18089.5192 h 1.07549012514996e308 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','1e857066-961d-be0e-29e7-5c9efd534f23') &R +[22,-84] 135557292 \0( 13555.7292 w 7.700402896226395e307 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','bda66d4f-737b-3622-b60f-aa27fe38ff30') A# +[-45] 716914271 \0 71691.4271 h 6.562339881458101e307 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','623d6d82-4422-2885-297f-7b2fec54178b') +[-40,84] 1012211222 \0g 101221.1222 h 7.539520705557441e307 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','dcb0e0ca-3a43-5f2e-556e-7945df65729e') H +[-104,-86] 2185722662 \0> -210924.4634 w 2.143274805821858e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','678f2360-36ac-d439-8d6d-f92295887e50') w +[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 2.9425818885529257e307 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','9780b53e-dc0f-4a21-bdb3-9798af1913ad') \r - diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql index ea80c38df66..dc212a7b8ff 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.sql +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -1,3 +1,16 @@ +SELECT + toTypeName(ui64), toTypeName(i64), + toTypeName(ui32), toTypeName(i32), + toTypeName(ui16), toTypeName(i16), + toTypeName(ui8), toTypeName(i8) +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 1); +SELECT + ui64, i64, + ui32, i32, + ui16, i16, + ui8, i8 +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 10, 1); +SELECT '-'; SELECT toTypeName(i) FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 1); @@ -20,19 +33,6 @@ SELECT FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 10, 1); SELECT '-'; SELECT -toTypeName(ui64), toTypeName(i64), -toTypeName(ui32), toTypeName(i32), -toTypeName(ui16), toTypeName(i16), -toTypeName(ui8), toTypeName(i8) -FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 1); -SELECT -ui64, i64, -ui32, i32, -ui16, i16, -ui8, i8 -FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 10, 1); -SELECT '-'; -SELECT toTypeName(d), toTypeName(dt), toTypeName(dtm) FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 1); SELECT @@ -60,13 +60,6 @@ SELECT d32, d64, d128 FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 10, 1); SELECT '-'; -SELECT - toTypeName(i) -FROM generate('i UUID', 1); -SELECT - i -FROM generate('i UUID', 10, 10, 10, 1); -SELECT '-'; SELECT toTypeName(i) FROM generate('i Tuple(Int32, Int64)', 1); @@ -88,13 +81,6 @@ SELECT i FROM generate('i Array(Nullable(Int32))', 10, 10, 10, 1); SELECT '-'; -SELECT - toTypeName(i) -FROM generate('i Array(Nullable(UUID))', 1); -SELECT - i -FROM generate('i Array(Nullable(UUID))', 10, 10, 10, 1); -SELECT '-'; SELECT toTypeName(i) FROM generate('i Tuple(Int32, Array(Int64))', 1); @@ -102,20 +88,6 @@ SELECT i FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 10, 1); SELECT '-'; -SELECT - toTypeName(i) -FROM generate('i FixedString(4)', 1); -SELECT - i -FROM generate('i FixedString(4)', 10, 10, 10, 1); -SELECT '-'; -SELECT - toTypeName(i) -FROM generate('i String', 10); -SELECT - i -FROM generate('i String', 10, 10, 10, 1); -SELECT '-'; SELECT toTypeName(i) FROM generate('i Nullable(String)', 1); @@ -131,7 +103,34 @@ SELECT FROM generate('i Array(String)', 10, 10, 10, 1); SELECT '-'; - +SELECT + toTypeName(i) +FROM generate('i UUID', 1); +SELECT + i +FROM generate('i UUID', 10, 10, 10, 1); +SELECT '-'; +SELECT + toTypeName(i) +FROM generate('i Array(Nullable(UUID))', 1); +SELECT + i +FROM generate('i Array(Nullable(UUID))', 10, 10, 10, 1); +SELECT '-'; +SELECT + toTypeName(i) +FROM generate('i FixedString(4)', 1); +SELECT + i +FROM generate('i FixedString(4)', 10, 10, 10, 1); +SELECT '-'; +SELECT + toTypeName(i) +FROM generate('i String', 10); +SELECT + i +FROM generate('i String', 10, 10, 10, 1); +SELECT '-'; DROP TABLE IF EXISTS test_table; CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Memory; INSERT INTO test_table SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); @@ -150,3 +149,4 @@ SELECT * FROM test_table_2; SELECT '-'; DROP TABLE IF EXISTS test_table_2; + From 647994fea2b28249139b3ed96574026e6563f3cf Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 11 Feb 2020 23:32:45 +0300 Subject: [PATCH 055/215] pcg64_oneseq -> pcg32 --- .../TableFunctions/TableFunctionRandom.cpp | 31 ++-- .../01072_random_table_function.reference | 166 +++++++++--------- 2 files changed, 102 insertions(+), 95 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/TableFunctions/TableFunctionRandom.cpp index f23b98bfbaa..1a6c6bc5e67 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionRandom.cpp @@ -92,10 +92,11 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_oneseq generator(random_seed); + pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - data[i] = static_cast(generator()); + UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); + data[i] = static_cast(a); } break; } @@ -138,10 +139,11 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_oneseq generator(random_seed); + pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - data[i] = static_cast(generator()); + Int64 a = static_cast(generator()) << 32 | static_cast(generator()); + data[i] = static_cast(a); } break; } @@ -164,12 +166,12 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_oneseq generator(random_seed); + pcg32 generator(random_seed); double d = 1.0; for (UInt64 i = 0; i < limit; ++i) { d = std::numeric_limits::max(); - data[i] = (d / pcg64::max()) * generator(); + data[i] = (d / pcg32::max()) * generator(); } break; } @@ -306,10 +308,11 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_oneseq generator(random_seed); + pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - data[i] = static_cast(generator()); + UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); + data[i] = a; } break; } @@ -317,10 +320,11 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_oneseq generator(random_seed); + pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - Int128 x = static_cast(generator()) << 64 | static_cast(generator()); + Int128 x = static_cast(generator()) << 96 | static_cast(generator()) << 32 | + static_cast(generator()) << 64 | static_cast(generator()); data[i] = x; } } @@ -329,10 +333,12 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_oneseq generator(random_seed); + pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - auto x = UInt128(generator(), generator()); + UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); + UInt64 b = static_cast(generator()) << 32 | static_cast(generator()); + auto x = UInt128(a, b); data[i] = x; } } @@ -468,3 +474,4 @@ void registerTableFunctionRandom(TableFunctionFactory & factory) } + diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference index 2770d1fcaf3..3111e897928 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.reference +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -1,14 +1,14 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -2254772619926532955 2254772619926532955 1234817989 1234817989 54213 -11323 197 -59 -9120028858397505560 9120028858397505560 1171957426 1171957426 42674 -22862 178 -78 -4555697903102013946 4555697903102013946 275100647 275100647 46055 -19481 231 -25 -5784362079052877875 5784362079052877875 1033685688 1033685688 51896 -13640 184 -72 -11035971995277520997 -7410772078432030619 180895192 180895192 15832 15832 216 -40 -7901646768096461004 7901646768096461004 135557292 135557292 28844 28844 172 -84 -6733841386518201279 6733841386518201279 716914271 716914271 15967 15967 95 95 -7736560050027905187 7736560050027905187 1012211222 1012211222 7702 7702 22 22 -2199287578947862030 2199287578947862030 2185722662 -2109244634 31526 31526 38 38 -3019483913099890467 3019483913099890467 2647224658 -1647742638 29010 29010 82 82 +5303502880439445170 5303502880439445170 1234817989 1234817989 54213 -11323 197 -59 +1181548283007126200 1181548283007126200 1171957426 1171957426 42674 -22862 178 -78 +776938933779198124 776938933779198124 275100647 275100647 46055 -19481 231 -25 +3079123348992892438 3079123348992892438 1033685688 1033685688 51896 -13640 184 -72 +9387607354063286610 -9059136719646265006 180895192 180895192 15832 15832 216 -40 +1372288647685551139 1372288647685551139 135557292 135557292 28844 28844 172 -84 +10000611939545164999 -8446132134164386617 716914271 716914271 15967 15967 95 95 +1156046020585944904 1156046020585944904 1012211222 1012211222 7702 7702 22 22 +8349022212802674610 8349022212802674610 2185722662 -2109244634 31526 31526 38 38 +6331568246612505101 6331568246612505101 2647224658 -1647742638 29010 29010 82 82 - Enum8(\'hello\' = 1, \'world\' = 5) world @@ -71,40 +71,40 @@ DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') 1996-11-02 14:35:41.110 1996-11-02 14:35:41.183110 1996-11-02 14:35:41.183110 - Float32 Float64 -9.783235e37 2.1973467205491123e307 -9.285203e37 8.887754501811354e307 -2.1795718e37 4.4396706606805647e307 -8.1897013e37 5.637042481600483e307 -1.4331993e37 1.07549012514996e308 -1.0739954e37 7.700402896226395e307 -5.67998e37 6.562339881458101e307 -8.019563e37 7.539520705557441e307 -1.7317079e38 2.143274805821858e307 -2.0973474e38 2.9425818885529257e307 +9.783235e37 5.168430093085938e307 +9.285203e37 4.905322146512668e307 +2.1795718e37 1.151455903014231e307 +8.1897013e37 4.326574656543525e307 +1.4331993e37 7.571513877802428e306 +1.0739954e37 5.673859577292225e306 +5.67998e37 3.000702391289156e307 +8.019563e37 4.236691550453344e307 +1.7317079e38 9.148518147657713e307 +2.0973474e38 1.108017190180919e308 - Decimal32(4) Decimal64(8) Decimal64(8) -123481.7989 22547726199.26532955 4159321346419233104838.6879832895010840 -117195.7426 91200288583.97505560 8403779329565810688767.7049545291714611 -27510.0647 45556979031.02013946 -13670461591942827725055.0250490776469300 -103368.5688 57843620790.52877875 12421744869005473959544.2499747955622051 -18089.5192 -74107720784.32030619 4056969511333950153663.4915186231430947 -13555.7292 79016467680.96461004 -8819413736166121578589.4583420666183888 -71691.4271 67338413865.18201279 13058329479868658041313.8432372419860363 -101221.1222 77365600500.27905187 -4693380431928321782727.0243506636623202 --210924.4634 21992875789.47862030 13765369952377767241248.9441272127848016 --164774.2638 30194839130.99890467 -13890064946313418575619.0315227826809939 +123481.7989 53035028804.39445170 9783236031310378439643.1472294664915640 +117195.7426 11815482830.07126200 1433199368304978416824.6218455877230102 +27510.0647 7769389337.79198124 -16711157663899806765510.0649541344873437 +103368.5688 30791233489.92892438 -15580363795141897721982.8655941091887288 +18089.5192 -90591367196.46265006 15401227599802737909025.7109101500379661 +13555.7292 13722886476.85551139 7984757011464664209957.7603879176325739 +71691.4271 -84461321341.64386617 5816721179226388864892.0657225650146885 +101221.1222 11560460205.85944904 -5775130779420200933943.2970518536587084 +-210924.4634 83490222128.02674610 -5061941637011485215525.6417334537314088 +-164774.2638 63315682466.12505101 -7557281935671753244601.7559777699677732 - Tuple(Int32, Int64) -(1234817989,2254772619926532955) -(1171957426,9120028858397505560) -(275100647,4555697903102013946) -(1033685688,5784362079052877875) -(180895192,-7410772078432030619) -(135557292,7901646768096461004) -(716914271,6733841386518201279) -(1012211222,7736560050027905187) -(-2109244634,2199287578947862030) -(-1647742638,3019483913099890467) +(1234817989,5303502880439445170) +(1171957426,1181548283007126200) +(275100647,776938933779198124) +(1033685688,3079123348992892438) +(180895192,-9059136719646265006) +(135557292,1372288647685551139) +(716914271,-8446132134164386617) +(1012211222,1156046020585944904) +(-2109244634,8349022212802674610) +(-1647742638,6331568246612505101) - Array(Int8) [-59,-78,-25,-72,-40,-84,95,22,38] @@ -131,16 +131,16 @@ Array(Nullable(Int32)) [371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] - Tuple(Int32, Array(Int64)) -(1234817989,[2254772619926532955,9120028858397505560,4555697903102013946,5784362079052877875,-7410772078432030619,7901646768096461004,6733841386518201279,7736560050027905187,2199287578947862030]) -(1171957426,[3019483913099890467,-4781013766399904222,-5327852745410412752,7078934595552553093,2990244123355912075,-2544286630298820818]) -(275100647,[6155991081669718686,7462222003717329977,-8255668614967296432,-7529819295378879967,-4777308097681484883,-4064480117123591373,6674750820081216293]) -(1033685688,[2050663721809231639,-6384194708780112896,-2808232718275215658,1619954721090656792,-5627002805867168609,-6128563945701772338,-7146544521171569603,6504888450989032669]) -(180895192,[1199208254069819846,-4069733657855461419]) -(135557292,[192577216783361448,-7343112807738526333]) -(716914271,[-9207713629233477390]) -(1012211222,[-562393447932771686,-6225026423445182831]) -(-2109244634,[-1388479317275096889,-1222297392734207149]) -(-1647742638,[3396028458740199176,8610993157653131131,-4072576266223306473,-6818310818869145616,-5713972449102020873,8197031236106666677,-1239306987803343619,8267468115072584172]) +(1234817989,[5303502880439445170,1181548283007126200,776938933779198124,3079123348992892438,-9059136719646265006,1372288647685551139,-8446132134164386617,1156046020585944904,8349022212802674610]) +(1171957426,[6331568246612505101,4328545451735172025,837735849651948139,3153250869148769217,-7783757525582213563,-3130704671775162746]) +(275100647,[-1986779751559399244,-2744084058313283668,1626824019940122840,-4096810749907976552,-101588646332188196,1984785042821053740,-4532839778647840507]) +(1033685688,[1064130551002268155,3531365820900360285,-6430432825062036096,673694157720662742,1098480112755237234,-5582206543758520198,1822983700066063177,-3760535601444222426]) +(180895192,[7337695457286344221,5872187140729212089]) +(135557292,[5142382871975234625,-3506363600639473930]) +(716914271,[2213685469994781810]) +(1012211222,[-5533057085551498458,1093259139288543534]) +(-2109244634,[2385587103426775499,6630437694997012859]) +(-1647742638,[-8881439430624548159,6128668282898355635,5437491183672208077,3669750475264598009,1788253815182834821,3436179822464488712,6060518060841781539,5049334787152748055]) - Nullable(String) )/VC)%f9 @@ -167,28 +167,28 @@ Array(String) ['\0n3;','\0bX(o2]uC','\0up_X\'','\0s','\05j|iS,','\0','\0(y.aRsVz','\0T:64 ]'] - UUID -1f4a8fc0-63ff-735b-7e90-d9ed3e183818 -3f39171b-1263-31fa-5046-2ea9fe2fd033 -9927a60f-01ac-f065-6da8-49def100c0cc -5d736910-493d-c3bf-6b5d-c8601d6440a3 -1e857066-961d-be0e-29e7-5c9efd534f23 -bda66d4f-737b-3622-b60f-aa27fe38ff30 -623d6d82-4422-2885-297f-7b2fec54178b -dcb0e0ca-3a43-5f2e-556e-7945df65729e -678f2360-36ac-d439-8d6d-f92295887e50 -9780b53e-dc0f-4a21-bdb3-9798af1913ad +4999d3c5-45da-a6b2-1065-b3e73d9ccab8 +0ac83dd8-0814-70ac-2abb-3e5f3c551e16 +82477b26-9dc9-7152-130b-59411e993423 +8ac94f92-d11e-acc7-100b-19bb11077748 +73ddaf77-aff2-ffb2-57de-3fc6327b320d +3c121527-9cce-c3b9-0ba0-3c51161d4a6b +2bc298f5-774d-dbc1-93fa-89c545e60e45 +d48d80b0-2e2f-7e86-e46d-8a57a55cf8b4 +d9eb0e16-766c-a7ac-1693-a3d3ac9aa0d8 +c7253454-2bbb-be98-fe97-15aa162839dc - Array(Nullable(UUID)) -['1f4a8fc0-63ff-735b-7e90-d9ed3e183818','3f39171b-1263-31fa-5046-2ea9fe2fd033','9927a60f-01ac-f065-6da8-49def100c0cc','5d736910-493d-c3bf-6b5d-c8601d6440a3','1e857066-961d-be0e-29e7-5c9efd534f23','bda66d4f-737b-3622-b60f-aa27fe38ff30','623d6d82-4422-2885-297f-7b2fec54178b','dcb0e0ca-3a43-5f2e-556e-7945df65729e','678f2360-36ac-d439-8d6d-f92295887e50'] -['9780b53e-dc0f-4a21-bdb3-9798af1913ad','c79810de-3635-d333-5ca1-7a81ab302b25','1c756bca-4438-3f17-a766-c8bcbe3ba400','d9072738-ac93-7ed6-167b-3c3c66d35a18','b1e8dec2-de29-3c9f-aaf2-f78fd92df3ce','9cd25f9f-3c0d-f43d-5a46-0194f0be04dd'] -['10a4718d-ab8c-49c6-c785-66ccf112f7d5','02ac2bf5-5634-a5a8-9a18-05ce8d1fb583','8037a13d-2004-08f2-f831-fa2387f5c29a','a99c4373-1121-2691-ecbb-216adbd748c7','ef0986ff-5031-0353-2f21-1de3ea53af08','778064a7-653b-ef7b-c77b-4d769b12b917','a1607e6f-691a-0ff0-b0b3-e454dae7bef7'] -['71c1b47a-c0eb-42b5-eecd-18dc585284fd','72bbf272-9ec5-09ec-f339-b5dac55c037b','26e5bce5-43f7-59b0-84c6-ef509f4c45eb','305fcbff-c366-2033-a8c5-d648f236e754','3a0d329f-f897-84e9-9e87-9501a713e63d','54bda20c-d5cd-a08a-c078-3c4fd81f4f55','43f549d1-3e5b-d5bf-ed32-b4850648bdc8','7eb6ac4f-06e0-ff48-6330-3c7afa5f2644'] -['17b9a4a5-fef8-a3f9-5af4-3b6e67ca62c9','3f524d8e-320d-00dc-c210-e199206550db'] -['005c592e-5081-9f3d-1fcb-5a9e82f39f97','29cf228d-b325-4a34-3eff-e80494a79260'] -['6c08b54b-8cf8-b96d-f087-8b54f5e72d0e'] -['7122e162-ab8b-a84a-6b71-c0846cf0204d','51c1de1a-24c7-18d6-39ed-e9023205610c'] -['f09d6779-1106-d667-e7c9-9a0cad544afe','62060fec-ee13-7c66-5da4-02c8f4d50dc9'] -['df1d0d54-d639-9c9b-2070-622fc9d82203','f23ef5b9-3797-9b0e-b8ac-67ea31b99c3e','e48afe73-9e22-7439-afed-d53b6ea204f4','d7f1ab47-4928-7623-283e-fb3f16aebeba','ea270407-d32f-a407-add2-3ae2d1113ccb','c43e9fff-2980-a1d1-f1bb-ff94d3cffbc2','a0cd54e6-0a2d-07ec-88ad-4f5d29c15b06','5e93413f-2eb9-5363-17ab-e2215b8b19e0'] +['4999d3c5-45da-a6b2-1065-b3e73d9ccab8','0ac83dd8-0814-70ac-2abb-3e5f3c551e16','82477b26-9dc9-7152-130b-59411e993423','8ac94f92-d11e-acc7-100b-19bb11077748','73ddaf77-aff2-ffb2-57de-3fc6327b320d','3c121527-9cce-c3b9-0ba0-3c51161d4a6b','2bc298f5-774d-dbc1-93fa-89c545e60e45','d48d80b0-2e2f-7e86-e46d-8a57a55cf8b4','d9eb0e16-766c-a7ac-1693-a3d3ac9aa0d8'] +['c7253454-2bbb-be98-fe97-15aa162839dc','1b8b5f7b-08db-7d2c-c118-1e3cc1ef6905','0ec48d19-1f21-81fb-3101-ee810eb8c05d','a6c2836a-5e2a-6d80-0959-71445e3232d6','0f3e95d7-b19b-6172-b288-04b96869907a','194c8a04-41c3-d349-cbcf-e4cb6eb50a26'] +['65d4bb24-418c-ce1d-517e-3317a1c80cb9','475d69f9-ff23-0c41-cf56-e4e1fa8646f6','1eb89734-d37b-8c72-b336-a1e565beab26','0f2c0965-14be-312e-211b-4ed4c88189cb','5c040bf4-3ba8-217b-84be-c9e4cb79eac1','550d6755-0c5c-15b3-4b75-d96c2819aacd','32ed929d-3477-9ff9-18d1-275ece35bc85'] +['2fafc357-c65c-5508-541b-4910fb504923','4612d73c-8443-1e17-1626-c13c17ccc201','2223f59a-b8f4-6b52-f13a-4c1e8c23aacc','d788aee1-23f3-570e-4f3b-130d38585201','6afb1f5e-3a1c-1610-a14c-e746b9618304','b003bd9a-5c3a-4132-9928-bc27b6b51628','c520f34b-e6e2-d2dd-e584-1a6eba57b77e','dbb4f033-dd30-090b-0b4a-a0ca4ec23e69'] +['725baf45-9f31-35bd-1019-914968c49767','8a96d309-cf7c-a95f-461a-e7764a64bccf'] +['8f12d466-baa6-d680-c17c-f399935f6b62','bb22e0c8-0ec2-15b1-3250-421c9806fd49'] +['b7013645-4614-d52e-c59c-65ed621e8414'] +['d4ed7d7f-8b1f-6991-74f7-cb187d47628b','51a3ec18-4967-32ae-d38f-656770d52224'] +['040cb3cc-a02d-58f9-7fc5-6bb0366b7366','f4bb3a59-ec6a-9c74-2f11-992c5b6d4b05'] +['8a9514e5-d533-7930-eb5a-6dd256a041c0','150b4bbf-f6a8-52d2-ec0d-a81401100801','a8ad6b08-d188-00a1-e4e0-616b324a4348','c209640a-5bc6-3e77-4556-30775457b60f','a482190e-eaae-f6d0-d1da-0915d03ccc02','16fbefc0-3eb7-7dec-6853-1557fa2fb964','67c6e040-dab2-56f9-0694-e9f57fad2000','66fcaeaf-4a5b-476a-cb8d-f9dd9ec16abe'] - FixedString(4) Ų @@ -223,18 +223,18 @@ String \0> \0XjbW:s< - -[77] -124167.6723 ('2061-04-17 21:59:44.573','3f72f405-ec3e-13c8-44ca-66ef335f7835') -[32,110] -141397.7312 ('1979-02-09 03:43:48.526','982486d1-5a5d-a308-e525-7bd8b80ffa73') -[68] -67417.0770 ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') +[77] -124167.6723 ('2061-04-17 21:59:44.573','b5fd844d-abb8-6b20-d7d0-f86e1120e744') +[32,110] -141397.7312 ('1979-02-09 03:43:48.526','bce332bd-cf45-f2cb-7da5-ddf9370fb1c7') +[68] -67417.0770 ('2080-03-12 14:17:31.269','bebf8e52-1ceb-73c7-2ead-f1eed124f71d') - -[-59,-78,-25,-72,-40,-84,95,22,38] 1234817989 )/VC)%f9 123481.7989 o 2.1973467205491123e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','1f4a8fc0-63ff-735b-7e90-d9ed3e183818') Ų -[82,65,35,-110,-57,-69] 1171957426 \0ih|;B 117195.7426 w 8.887754501811354e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','3f39171b-1263-31fa-5046-2ea9fe2fd033') -[72,119,-78,-58,13,39,-71] 275100647 \0J"Z,kd 27510.0647 w 4.4396706606805647e307 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','9927a60f-01ac-f065-6da8-49def100c0cc') ج -[81,107,-11,-63,-59,69,-80,-122] 1033685688 \0m"m]$35 103368.5688 w 5.637042481600483e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','5d736910-493d-c3bf-6b5d-c8601d6440a3') _ -[87,-76] 180895192 \00 18089.5192 h 1.07549012514996e308 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','1e857066-961d-be0e-29e7-5c9efd534f23') &R -[22,-84] 135557292 \0( 13555.7292 w 7.700402896226395e307 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','bda66d4f-737b-3622-b60f-aa27fe38ff30') A# -[-45] 716914271 \0 71691.4271 h 6.562339881458101e307 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','623d6d82-4422-2885-297f-7b2fec54178b') -[-40,84] 1012211222 \0g 101221.1222 h 7.539520705557441e307 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','dcb0e0ca-3a43-5f2e-556e-7945df65729e') H -[-104,-86] 2185722662 \0> -210924.4634 w 2.143274805821858e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','678f2360-36ac-d439-8d6d-f92295887e50') w -[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 2.9425818885529257e307 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','9780b53e-dc0f-4a21-bdb3-9798af1913ad') \r +[-59,-78,-25,-72,-40,-84,95,22,38] 1234817989 )/VC)%f9 123481.7989 o 5.168430093085938e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4999d3c5-45da-a6b2-1065-b3e73d9ccab8') Ų +[82,65,35,-110,-57,-69] 1171957426 \0ih|;B 117195.7426 w 4.905322146512668e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','0ac83dd8-0814-70ac-2abb-3e5f3c551e16') +[72,119,-78,-58,13,39,-71] 275100647 \0J"Z,kd 27510.0647 w 1.151455903014231e307 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','82477b26-9dc9-7152-130b-59411e993423') ج +[81,107,-11,-63,-59,69,-80,-122] 1033685688 \0m"m]$35 103368.5688 w 4.326574656543525e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','8ac94f92-d11e-acc7-100b-19bb11077748') _ +[87,-76] 180895192 \00 18089.5192 h 7.571513877802428e306 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','73ddaf77-aff2-ffb2-57de-3fc6327b320d') &R +[22,-84] 135557292 \0( 13555.7292 w 5.673859577292225e306 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','3c121527-9cce-c3b9-0ba0-3c51161d4a6b') A# +[-45] 716914271 \0 71691.4271 h 3.000702391289156e307 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','2bc298f5-774d-dbc1-93fa-89c545e60e45') +[-40,84] 1012211222 \0g 101221.1222 h 4.236691550453344e307 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','d48d80b0-2e2f-7e86-e46d-8a57a55cf8b4') H +[-104,-86] 2185722662 \0> -210924.4634 w 9.148518147657713e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','d9eb0e16-766c-a7ac-1693-a3d3ac9aa0d8') w +[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 1.108017190180919e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','c7253454-2bbb-be98-fe97-15aa162839dc') \r - From 1de74019a1968cc0d5768fd8e7851fdfc459c1d1 Mon Sep 17 00:00:00 2001 From: TAC Date: Wed, 19 Feb 2020 13:28:46 +0000 Subject: [PATCH 056/215] Translate history.md via GitLocalize --- docs/ja/introduction/history.md | 47 ++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/docs/ja/introduction/history.md b/docs/ja/introduction/history.md index 7004e990a59..fbae277542d 120000 --- a/docs/ja/introduction/history.md +++ b/docs/ja/introduction/history.md @@ -1 +1,46 @@ -../../en/introduction/history.md \ No newline at end of file +# ClickHouseの歴史 + +ClickHouseは元々、 [世界で2番目に大きなWeb分析プラットフォーム ](http://w3techs.com/technologies/overview/traffic_analysis/all) である [Yandex.Metrica](https://metrica.yandex.com/)を強化するために開発されたもので、このシステムのコアコンポーネントであり続けています。データベースには13兆を超えるレコードがあり、毎日200億を超えるイベントが発生しますが、ClickHouseでは集計されていないデータから直接カスタムレポートを生成できます。この記事では、ClickHouseの開発の初期段階におけるClickHouseの目標について簡単に説明します。 + +Yandex.Metricaは、ユーザーが定義した任意のセグメントを使用して、ヒットとセッションに基づいてカスタマイズされたレポートをその都度作成します。これには、多くの場合、一意のユーザー数などの複雑な集計を作成する必要があり、レポートを作成するための新しいデータがリアルタイムで受信されます。 + +2014年4月の時点で、Yandex.Metricaは毎日約120億のイベント(ページビューとクリック)を追跡していました。カスタムレポートを作成するには、これらすべてのイベントを保存する必要があります。単一のクエリで、数百ミリ秒以内に数百万行をスキャンしたり、わずか数秒で数億行をスキャンする必要があります。 + +## Yandex.Metricaおよびその他のYandexサービスでの用途 + +ClickHouseは、Yandex.Metricaで複数の目的に使用されます。その主なタスクは、非集計データを使用してオンラインでレポートを作成することです。 374台のサーバーからなるクラスターを使用し、20.3兆行をデータベースに保存します。圧縮されたデータの量は、重複データとレプリケーションを除いて約2PBです。非圧縮データ(TSV形式)の量は約17PBにもなります。 + +ClickHouseは以下の目的にも使用されます。 + +- Yandex.Metricaのデータをセッションリプレイのために保存する。 +- 中間データを処理する。 +- Analyticsを使用したグローバルレポートの作成。 +- Yandex.Metricaエンジンをデバッグするためのクエリの実行。 +- APIおよびユーザーインターフェイスからのログの分析。 + +ClickHouseは少なくとも、そのほか12のYandexのサービス(検索分野、Market、Direct、ビジネス分析、モバイル開発、AdFox、パーソナルサービスなど)で利用されています。 + +## 集約されたデータと非集約データ + +統計を効果的に計算するには、データの量を減らすため、データを集計する必要があるという一般的な意見があります。 + +ただし次の理由により、データ集約は非常に限られた解決策です。 + +- ユーザーが必要とするレポートの事前定義リストが必要です。 +- ユーザーはカスタムレポートを作成できません。 +- 大量のキーを集約する場合、データ量は削減されず、集約は役に立ちません。 +- 多数のレポートの場合、集計のバリエーションが多すぎます(組み合わせ爆発) +- カーディナリティの高いキー(URLなど)を集約する場合、データの量はそれほど減少しません(たかだか半分程度)。 +- このため、集約されたデータの量は減少するどころか増加する場合があります。 +- 生成した全てのレポートをユーザが見るわけではありません。計算の大部分は無駄になっています。 +- データの論理的な整合性は、さまざまな集計に対して違反する可能性があります。 + +何も集約せず、集約されていないデータを操作する場合、実際には計算量が減る可能性があります。 + +しかしながら集約は、その大部分がオフラインで実行され、比較的ゆったりと処理します。対照的に、オンライン計算では、ユーザーが結果を待っているため、できるだけ高速に計算する必要があります。 + +Yandex.Metricaには、Metrageと呼ばれるデータを集計するための特別なシステムがあり、これはほとんどのレポートで使用されています。 2009年以降、Yandex.Metricaは、以前にレポートビルダーで使用されていたOLAPServerと呼ばれる非集計データ用の特殊なOLAPデータベースも使用しました。 OLAPServerは非集計データに対してはうまく機能しましたが、多くの制限があり、必要に応じてすべてのレポートに使用することはできませんでした。制限とは、(数値のみしか扱えない)データ型サポートの欠如や、リアルタイムでデータを段階的に更新できないこと(毎日データを書き換えることによってのみ更新可能)がありました。 OLAPServerはDBMSではなく、特殊なDBだったのです。 + +OLAPServerの制限を取り除き、レポートのための非集計データを扱う問題を解決するために、私達は ClickHouse DBMSを開発しました。 + +[Original article](https://clickhouse.yandex/docs/en/introduction/history/) From f1660f8377c784a8f124e427b16bc2125f3c14c6 Mon Sep 17 00:00:00 2001 From: TAC Date: Wed, 19 Feb 2020 13:29:42 +0000 Subject: [PATCH 057/215] Translate features_considered_disadvantages.md via GitLocalize --- docs/ja/introduction/features_considered_disadvantages.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/ja/introduction/features_considered_disadvantages.md b/docs/ja/introduction/features_considered_disadvantages.md index 45d3cdf563a..9a7e9ae5758 120000 --- a/docs/ja/introduction/features_considered_disadvantages.md +++ b/docs/ja/introduction/features_considered_disadvantages.md @@ -1 +1,7 @@ -../../en/introduction/features_considered_disadvantages.md \ No newline at end of file +# 欠点と考えられるClickHouseの機能 + +1. 本格的なトランザクションはありません。 +2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 +3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 + +[Original article](https://clickhouse.yandex/docs/en/introduction/features_considered_disadvantages/) From a739e4274048d241a57047f687ace2b83a96eade Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Feb 2020 21:41:22 +0300 Subject: [PATCH 058/215] Better build time --- dbms/programs/server/Server.cpp | 2 +- dbms/src/Interpreters/Context.cpp | 25 +++++++++++++++++++++++++ dbms/src/Interpreters/Context.h | 25 ++++--------------------- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index b97ed1cea2a..2ea4fa24644 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -1009,7 +1009,7 @@ int Server::main(const std::vector & /*args*/) global_context->getConfigRef(), graphite_key, async_metrics)); } - SessionCleaner session_cleaner(*global_context); + global_context->createSessionCleaner(); waitForTerminationRequest(); } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f6e280c9e2c..2c19110a8d7 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -2153,6 +2153,26 @@ void Context::resetInputCallbacks() } +class SessionCleaner +{ +public: + SessionCleaner(Context & context_) + : context{context_} + { + } + ~SessionCleaner(); + +private: + void run(); + + Context & context; + + std::mutex mutex; + std::condition_variable cond; + std::atomic quit{false}; + ThreadFromGlobalPool thread{&SessionCleaner::run, this}; +}; + SessionCleaner::~SessionCleaner() { try @@ -2187,5 +2207,10 @@ void SessionCleaner::run() } } +void Context::createSessionCleaner() +{ + session_cleaner = std::make_unique(*this); +} + } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 07fa6b06c1f..74bffcd3d07 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -93,6 +93,7 @@ class DiskSelector; class StoragePolicy; using StoragePolicyPtr = std::shared_ptr; class StoragePolicySelector; +class SessionCleaner; class IOutputFormat; using OutputFormatPtr = std::shared_ptr; @@ -186,6 +187,7 @@ private: Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context. Could be equal to this. + std::unique_ptr session_cleaner; /// It will launch a thread to clean old named HTTP sessions. See 'createSessionCleaner'. UInt64 session_close_cycle = 0; bool session_is_used = false; @@ -593,6 +595,8 @@ public: void dropCompiledExpressionCache() const; #endif + void createSessionCleaner(); + /// Add started bridge command. It will be killed after context destruction void addXDBCBridgeCommand(std::unique_ptr cmd) const; @@ -663,25 +667,4 @@ private: std::unique_lock table_lock; }; - -class SessionCleaner -{ -public: - SessionCleaner(Context & context_) - : context{context_} - { - } - ~SessionCleaner(); - -private: - void run(); - - Context & context; - - std::mutex mutex; - std::condition_variable cond; - std::atomic quit{false}; - ThreadFromGlobalPool thread{&SessionCleaner::run, this}; -}; - } From f888c6ac882818c7a5068bc2d47e047c0bfe06dc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Feb 2020 16:21:58 +0300 Subject: [PATCH 059/215] Fixed build --- dbms/src/Interpreters/Context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 74bffcd3d07..1119323dd0f 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -187,7 +187,7 @@ private: Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context. Could be equal to this. - std::unique_ptr session_cleaner; /// It will launch a thread to clean old named HTTP sessions. See 'createSessionCleaner'. + std::shared_ptr session_cleaner; /// It will launch a thread to clean old named HTTP sessions. See 'createSessionCleaner'. UInt64 session_close_cycle = 0; bool session_is_used = false; From 1f275c965f32c4b06ee0a2afd2518d9ed8291417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Rodriguez?= Date: Thu, 20 Feb 2020 16:32:05 +0100 Subject: [PATCH 060/215] Rebase with upstream/master --- base/mysqlxx/CMakeLists.txt | 2 - .../Dictionaries/MySQLDictionarySource.cpp | 3 +- .../ExternalDictionariesLoader.cpp | 34 ----- .../Interpreters/ExternalDictionariesLoader.h | 8 -- libs/libmysqlxx/include/mysqlxx/PoolFactory.h | 51 -------- libs/libmysqlxx/src/PoolFactory.cpp | 123 ------------------ 6 files changed, 1 insertion(+), 220 deletions(-) delete mode 100644 libs/libmysqlxx/include/mysqlxx/PoolFactory.h delete mode 100644 libs/libmysqlxx/src/PoolFactory.cpp diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt index b85d3371336..2d2ad75628d 100644 --- a/base/mysqlxx/CMakeLists.txt +++ b/base/mysqlxx/CMakeLists.txt @@ -8,7 +8,6 @@ add_library (mysqlxx src/Row.cpp src/Value.cpp src/Pool.cpp - src/PoolFactory.cpp src/PoolWithFailover.cpp include/mysqlxx/Connection.h @@ -16,7 +15,6 @@ add_library (mysqlxx include/mysqlxx/mysqlxx.h include/mysqlxx/Null.h include/mysqlxx/Pool.h - include/mysqlxx/PoolFactory.h include/mysqlxx/PoolWithFailover.h include/mysqlxx/Query.h include/mysqlxx/ResultBase.h diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.cpp b/dbms/src/Dictionaries/MySQLDictionarySource.cpp index 823d17a25b9..8df029f6c27 100644 --- a/dbms/src/Dictionaries/MySQLDictionarySource.cpp +++ b/dbms/src/Dictionaries/MySQLDictionarySource.cpp @@ -46,7 +46,6 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) # include # include # include "readInvalidateQuery.h" -# include namespace DB { @@ -67,7 +66,7 @@ MySQLDictionarySource::MySQLDictionarySource( , update_field{config.getString(config_prefix + ".update_field", "")} , dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)} , sample_block{sample_block_} - , pool{mysqlxx::PoolFactory::instance().Get(config, config_prefix)} + , pool{config, config_prefix} , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 72918a8374c..8f4d79a5398 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -19,40 +19,6 @@ ExternalDictionariesLoader::ExternalDictionariesLoader(Context & context_) } -void ExternalDictionariesLoader::reload(const String & name, bool load_never_loading) -{ - #if USE_MYSQL - mysqlxx::PoolFactory::instance().reset(); - #endif - ExternalLoader::reload(name, load_never_loading); -} - -void ExternalDictionariesLoader::reload(bool load_never_loading) -{ - #if USE_MYSQL - mysqlxx::PoolFactory::instance().reset(); - #endif - ExternalLoader::reload(load_never_loading); -} - -void ExternalDictionariesLoader::addConfigRepository( - const std::string & repository_name, std::unique_ptr config_repository) -{ - ExternalLoader::addConfigRepository(repository_name, std::move(config_repository), {"dictionary", "name"}); -} - - -void ExternalDictionariesLoader::addDictionaryWithConfig( - const String & dictionary_name, const String & repo_name, const ASTCreateQuery & query, bool load_never_loading) const -{ - ExternalLoader::addObjectAndLoad( - dictionary_name, /// names are equal - dictionary_name, - repo_name, - getDictionaryConfigurationFromAST(query), - "dictionary", load_never_loading); -} - ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config, const std::string & repository_name) const diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.h b/dbms/src/Interpreters/ExternalDictionariesLoader.h index 957bfdb0022..6bfa3ad5e85 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.h +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.h @@ -28,14 +28,6 @@ public: return std::static_pointer_cast(tryLoad(name)); } - /// Override ExternalLoader::reload to reset mysqlxx::PoolFactory.h - /// since connection parameters might have changed. Inherited method is called afterward - void reload(const String & name, bool load_never_loading = false); - - /// Override ExternalLoader::reload to reset mysqlxx::PoolFactory.h - /// since connection parameters might have changed. Inherited method is called afterward - void reload(bool load_never_loading = false); - protected: LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config, const std::string & repository_name) const override; diff --git a/libs/libmysqlxx/include/mysqlxx/PoolFactory.h b/libs/libmysqlxx/include/mysqlxx/PoolFactory.h deleted file mode 100644 index 3c553b8b6da..00000000000 --- a/libs/libmysqlxx/include/mysqlxx/PoolFactory.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include -#include -#include -#include "PoolWithFailover.h" - -#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1 -#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16 -#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 - -namespace mysqlxx -{ -/* - * PoolFactory.h - * This class is a helper singleton to mutualize connections to MySQL. - */ -class PoolFactory final : private boost::noncopyable -{ -public: - static PoolFactory & instance(); - - PoolFactory(const PoolFactory &) = delete; - - /** Allocates a PoolWithFailover to connect to MySQL. */ - PoolWithFailover Get(const std::string & config_name, - unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); - - /** Allocates a PoolWithFailover to connect to MySQL. */ - PoolWithFailover Get(const Poco::Util::AbstractConfiguration & config, - const std::string & config_name, - unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); - - void reset(); - - - ~PoolFactory() = default; - PoolFactory& operator=(const PoolFactory &) = delete; - -private: - PoolFactory(); - - struct Impl; - std::unique_ptr impl; -}; - -} diff --git a/libs/libmysqlxx/src/PoolFactory.cpp b/libs/libmysqlxx/src/PoolFactory.cpp deleted file mode 100644 index e721fe5027d..00000000000 --- a/libs/libmysqlxx/src/PoolFactory.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include - -namespace mysqlxx -{ - -struct PoolFactory::Impl -{ - // Cache of already affected pools identified by their config name - std::map> pools; - - // Cache of Pool ID (host + port + user +...) cibling already established shareable pool - std::map pools_by_ids; - - /// Protect pools and pools_by_ids caches - std::mutex mutex; -}; - -PoolWithFailover PoolFactory::Get(const std::string & config_name, unsigned default_connections, - unsigned max_connections, size_t max_tries) -{ - return Get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries); -} - -/// Duplicate of code from StringUtils.h. Copied here for less dependencies. -static bool startsWith(const std::string & s, const char * prefix) -{ - return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); -} - -static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config, - const std::string & config_name) -{ - bool shared = config.getBool(config_name + ".share_connection", false); - - // Not shared no need to generate a name the pool won't be stored - if (!shared) - return ""; - - std::string entry_name = ""; - std::string host = config.getString(config_name + ".host", ""); - std::string port = config.getString(config_name + ".port", ""); - std::string user = config.getString(config_name + ".user", ""); - std::string db = config.getString(config_name + ".db", ""); - std::string table = config.getString(config_name + ".table", ""); - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_name, keys); - - if (config.has(config_name + ".replica")) - { - Poco::Util::AbstractConfiguration::Keys replica_keys; - config.keys(config_name, replica_keys); - for (const auto & replica_config_key : replica_keys) - { - /// There could be another elements in the same level in configuration file, like "user", "port"... - if (startsWith(replica_config_key, "replica")) - { - std::string replica_name = config_name + "." + replica_config_key; - std::string tmp_host = config.getString(replica_name + ".host", host); - std::string tmp_port = config.getString(replica_name + ".port", port); - std::string tmp_user = config.getString(replica_name + ".user", user); - entry_name += (entry_name.empty() ? "" : "|") + tmp_user + "@" + tmp_host + ":" + tmp_port + "/" + db; - } - } - } - else - { - entry_name = user + "@" + host + ":" + port + "/" + db; - } - return entry_name; -} - -PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & config, - const std::string & config_name, unsigned default_connections, unsigned max_connections, size_t max_tries) -{ - - std::lock_guard lock(impl->mutex); - if (auto entry = impl->pools.find(config_name); entry != impl->pools.end()) - { - return *(entry->second.get()); - } - else - { - std::string entry_name = getPoolEntryName(config, config_name); - if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end()) - { - entry = impl->pools.find(id->second); - std::shared_ptr pool = entry->second; - impl->pools.insert_or_assign(config_name, pool); - return *pool; - } - - auto pool = std::make_shared(config, config_name, default_connections, max_connections, max_tries); - // Check the pool will be shared - if (!entry_name.empty()) - { - // Store shared pool - impl->pools.insert_or_assign(config_name, pool); - impl->pools_by_ids.insert_or_assign(entry_name, config_name); - } - auto a2 = *(pool.get()); - return *(pool.get()); - } -} - -void PoolFactory::reset() -{ - std::lock_guard lock(impl->mutex); - impl->pools.clear(); - impl->pools_by_ids.clear(); -} - -PoolFactory::PoolFactory() : impl(std::make_unique()) {} - -PoolFactory & PoolFactory::instance() -{ - static PoolFactory ret; - return ret; -} - -} From d4cfda40502c7a7a27e3cf4f7d18ef75a201a1d0 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 24 Feb 2020 16:44:37 -0400 Subject: [PATCH 061/215] Update query_complexity.md explanation that `max_result_rows result_overflow_mode=break ` depends of max_block_size --- .../operations/settings/query_complexity.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index abdfd8dcc1f..96af6e0205e 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -108,7 +108,24 @@ ## result_overflow_mode Что делать, если объём результата превысил одно из ограничений: throw или break. По умолчанию: throw. -Использование break по смыслу похоже на LIMIT. + +Использование break по смыслу похоже на LIMIT. Break прерывает выполнение только на уровне блока. Т.е. число строк которые вернет запрос будет кратно [max_block_size](settings.md#max_block_size) и зависит от [max_threads](settings.md#settings-max_threads). + +Пример: +```sql +SET max_threads = 3, max_block_size = 3333; +SET max_result_rows = 3334, result_overflow_mode = 'break'; + +SELECT * +FROM numbers_mt(100000) +FORMAT Null; +``` + +Результат: + +```text +6666 rows in set. ... +``` ## max_execution_time From 7a94d393ca20d91da581f2f6016b64c96393f60b Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 24 Feb 2020 17:58:20 -0400 Subject: [PATCH 062/215] Update query_complexity.md --- docs/ru/operations/settings/query_complexity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index 96af6e0205e..3d0c8dd9c38 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -109,7 +109,7 @@ Что делать, если объём результата превысил одно из ограничений: throw или break. По умолчанию: throw. -Использование break по смыслу похоже на LIMIT. Break прерывает выполнение только на уровне блока. Т.е. число строк которые вернет запрос будет кратно [max_block_size](settings.md#max_block_size) и зависит от [max_threads](settings.md#settings-max_threads). +Использование break по смыслу похоже на LIMIT. Break прерывает выполнение только на уровне блока. Т.е. число строк которые вернет запрос будет больше чем ограничение [max_result_rows](#max_result_rows), кратно [max_block_size](settings.md#max_block_size) и зависит от [max_threads](settings.md#settings-max_threads). Пример: ```sql From 4e5aff70c73dabbcb944bb2193643a094c1d40ea Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Thu, 20 Feb 2020 20:51:44 +0300 Subject: [PATCH 063/215] add perf test for index analyzing with set --- dbms/tests/performance/set_index.xml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 dbms/tests/performance/set_index.xml diff --git a/dbms/tests/performance/set_index.xml b/dbms/tests/performance/set_index.xml new file mode 100644 index 00000000000..f487f3d2f65 --- /dev/null +++ b/dbms/tests/performance/set_index.xml @@ -0,0 +1,22 @@ + + loop + + DROP TABLE IF EXISTS test_in + CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a + INSERT INTO test_in SELECT number FROM numbers(500000000) + + + + 8000 + + + 7000 + 20000 + + + + + SELECT count() FROM test_in WHERE a IN (SELECT rand(1) FROM numbers(100000)) SETTINGS max_rows_to_read = 1, read_overflow_mode = 'break' + + DROP TABLE IF EXISTS test_in + From 0a8b0c86cf86f8c66eef6f1cdea913f86395df30 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Fri, 21 Feb 2020 17:33:51 +0300 Subject: [PATCH 064/215] optimization for one-element tuples at 'in' --- dbms/src/Interpreters/Set.cpp | 44 ++++++++++++++++---- dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- dbms/src/Storages/MergeTree/KeyCondition.h | 14 ++++++- dbms/tests/performance/set_index.xml | 1 - 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 23bdcf10729..cd45ecec379 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -517,7 +517,7 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, else { if (invert_left_infinities) - left_point.push_back(FieldWithInfinity::getPlusinfinity()); + left_point.push_back(FieldWithInfinity::getPlusInfinity()); else left_point.push_back(FieldWithInfinity::getMinusInfinity()); } @@ -534,7 +534,7 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, if (invert_right_infinities) right_point.push_back(FieldWithInfinity::getMinusInfinity()); else - right_point.push_back(FieldWithInfinity::getPlusinfinity()); + right_point.push_back(FieldWithInfinity::getPlusInfinity()); } } @@ -557,13 +557,41 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, return extract_tuple(i) < rhs; }; + auto compare_one = [this](size_t i, const IColumn & rhs) + { + return ordered_set[0]->compareAt(i, 0, rhs, 1) < 0; + }; + + auto lower_for_field = [&](const FieldWithInfinity & field) + { + if (field == FieldWithInfinity::getMinusInfinity()) + return indices.begin(); + if (field == FieldWithInfinity::getPlusInfinity()) + return indices.end(); + + auto temp_column = ordered_set[0]->cloneEmpty(); + temp_column->insert(field.getFieldIfFinite()); + return std::lower_bound(indices.begin(), indices.end(), *temp_column, compare_one); + }; + + decltype(indices.begin()) left_lower; + decltype(indices.begin()) right_lower; + /** Because each parallelogram maps to a contiguous sequence of elements - * layed out in the lexicographically increasing order, the set intersects the range - * if and only if either bound coincides with an element or at least one element - * is between the lower bounds - */ - auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, compare); - auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, compare); + * layed out in the lexicographically increasing order, the set intersects the range + * if and only if either bound coincides with an element or at least one element + * is between the lower bounds + */ + if (tuple_size == 1) + { + left_lower = lower_for_field(left_point[0]); + right_lower = lower_for_field(right_point[0]); + } + else + { + left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, compare); + right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, compare); + } return { diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index f8c7db4a423..d3eed9a015a 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -363,7 +363,7 @@ FieldWithInfinity FieldWithInfinity::getMinusInfinity() return FieldWithInfinity(Type::MINUS_INFINITY); } -FieldWithInfinity FieldWithInfinity::getPlusinfinity() +FieldWithInfinity FieldWithInfinity::getPlusInfinity() { return FieldWithInfinity(Type::PLUS_INFINITY); } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index 004cfbc9ea8..d38f0195746 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -15,6 +15,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_TYPE_OF_FIELD; +} + class IFunction; using FunctionBasePtr = std::shared_ptr; @@ -206,11 +211,18 @@ public: FieldWithInfinity(Field && field_); static FieldWithInfinity getMinusInfinity(); - static FieldWithInfinity getPlusinfinity(); + static FieldWithInfinity getPlusInfinity(); bool operator<(const FieldWithInfinity & other) const; bool operator==(const FieldWithInfinity & other) const; + Field getFieldIfFinite() const + { + if (type != NORMAL) + throw Exception("Trying to get field of infinite type", ErrorCodes::BAD_TYPE_OF_FIELD); + return field; + } + private: Field field; Type type; diff --git a/dbms/tests/performance/set_index.xml b/dbms/tests/performance/set_index.xml index f487f3d2f65..4e24b7ccd79 100644 --- a/dbms/tests/performance/set_index.xml +++ b/dbms/tests/performance/set_index.xml @@ -1,7 +1,6 @@ loop - DROP TABLE IF EXISTS test_in CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a INSERT INTO test_in SELECT number FROM numbers(500000000) From 1e013c742ac039a39c96626f81907d0d8d1e5eb0 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Tue, 25 Feb 2020 16:00:28 -0400 Subject: [PATCH 065/215] Update settings.md max_insert_threads RU description --- docs/ru/operations/settings/settings.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 298dd7364c3..04039393e51 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -550,6 +550,10 @@ log_query_threads=1 Чем меньше `max_threads`, тем меньше будет использоваться оперативки. +## max_insert_threads {#settings-max_insert_threads} + +Максимальное количество потоков для выполнения запроса `INSERT SELECT`. По умолчанию (0 - auto) определяется автоматически исходя из количества потоков данных которое выдает SELECT. Если параметр max_insert_threads задан (не 0), то будет использовано минимальное значение из заданного и автоматического. + ## max_compress_block_size Максимальный размер блоков не сжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативки. Как правило, не имеет смысла менять эту настройку. From c509e9038f14ed3304b2f2dd21bee6ec861f5909 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 26 Feb 2020 13:16:32 +0300 Subject: [PATCH 066/215] trigger tests again --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 04039393e51..f71b24ff269 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -552,7 +552,7 @@ log_query_threads=1 ## max_insert_threads {#settings-max_insert_threads} -Максимальное количество потоков для выполнения запроса `INSERT SELECT`. По умолчанию (0 - auto) определяется автоматически исходя из количества потоков данных которое выдает SELECT. Если параметр max_insert_threads задан (не 0), то будет использовано минимальное значение из заданного и автоматического. +Максимальное количество потоков для выполнения запроса `INSERT SELECT`. По умолчанию (0 - auto) определяется автоматически исходя из количества потоков данных которое выдает SELECT. Если параметр `max_insert_threads` задан (не 0), то будет использовано минимальное значение из заданного и автоматического. ## max_compress_block_size From d2ab30ec48022cae7589cf26196e1f1df3c1e4c1 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 26 Feb 2020 17:12:07 +0300 Subject: [PATCH 067/215] implement StorageGenerate --- .../StorageGenerate.cpp} | 190 +++++++++--------- dbms/src/Storages/StorageGenerate.h | 37 ++++ dbms/src/Storages/registerStorages.cpp | 3 +- dbms/src/Storages/registerStorages.h | 1 + .../TableFunctions/TableFunctionGenerate.cpp | 80 ++++++++ ...nctionRandom.h => TableFunctionGenerate.h} | 2 +- .../TableFunctions/registerTableFunctions.cpp | 2 +- .../TableFunctions/registerTableFunctions.h | 2 +- 8 files changed, 219 insertions(+), 98 deletions(-) rename dbms/src/{TableFunctions/TableFunctionRandom.cpp => Storages/StorageGenerate.cpp} (77%) create mode 100644 dbms/src/Storages/StorageGenerate.h create mode 100644 dbms/src/TableFunctions/TableFunctionGenerate.cpp rename dbms/src/TableFunctions/{TableFunctionRandom.h => TableFunctionGenerate.h} (90%) diff --git a/dbms/src/TableFunctions/TableFunctionRandom.cpp b/dbms/src/Storages/StorageGenerate.cpp similarity index 77% rename from dbms/src/TableFunctions/TableFunctionRandom.cpp rename to dbms/src/Storages/StorageGenerate.cpp index 1a6c6bc5e67..781f5f36276 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.cpp +++ b/dbms/src/Storages/StorageGenerate.cpp @@ -1,8 +1,12 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include -#include -#include #include #include #include @@ -16,39 +20,24 @@ #include #include - -#include -#include -#include - +#include #include #include -#include -#include -#include -#include - -#include "registerTableFunctions.h" - - namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int BAD_TYPE_OF_FIELD; - extern const int LOGICAL_ERROR; +extern const int DATABASE_ACCESS_DENIED; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int EMPTY_LIST_OF_COLUMNS_PASSED; } void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed) { TypeIndex idx = type->getTypeId(); - if (!random_seed) - random_seed = randomSeed(); (void) max_string_length; switch (idx) @@ -328,7 +317,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, data[i] = x; } } - break; + break; case TypeIndex::UUID: { auto & data = typeid_cast &>(column).getData(); @@ -342,7 +331,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, data[i] = x; } } - break; + break; case TypeIndex::Array: { auto & column_array = typeid_cast(column); @@ -380,23 +369,23 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, case TypeIndex::Interval: throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::Nullable: + { + auto & column_nullable = typeid_cast(column); + auto nested_type = typeid_cast(type.get())->getNestedType(); + + auto & null_map = column_nullable.getNullMapData(); + IColumn & nested_column = column_nullable.getNestedColumn(); + + fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, random_seed); + + pcg32 generator(random_seed); + null_map.resize(limit); + for (UInt64 i = 0; i < limit; ++i) { - auto & column_nullable = typeid_cast(column); - auto nested_type = typeid_cast(type.get())->getNestedType(); - - auto & null_map = column_nullable.getNullMapData(); - IColumn & nested_column = column_nullable.getNestedColumn(); - - fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, random_seed); - - pcg32 generator(random_seed); - null_map.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - null_map[i] = generator() < 1024; - } - break; + null_map[i] = generator() < 1024; } + break; + } case TypeIndex::Function: throw Exception("Type 'Funclion' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::AggregateFunction: @@ -406,72 +395,85 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, } } -StoragePtr TableFunctionRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const +StorageGenerate::StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_, + UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_) + : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_), random_seed(random_seed_) { - ASTs & args_func = ast_function->children; + setColumns(columns_); +} - if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); - ASTs & args = args_func.at(0)->children; - - if (args.size() < 2) - throw Exception("Table function '" + getName() + "' requires at least two arguments: "\ - " structure, limit(, max_array_length, max_string_length, random_seed).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (args.size() > 5) - throw Exception("Table function '" + getName() + "' requires at most five arguments: "\ - " structure, limit, max_array_length, max_string_length, random_seed.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - /// Parsing first argument as table structure and creating a sample block - std::string structure = args[0]->as().value.safeGet(); - - UInt64 limit = 1; - UInt64 max_array_length = 10; - UInt64 max_string_length = 10; - UInt64 random_seed = 0; // zero for random - - /// Parsing second argument if present - if (args.size() >= 2) - limit = args[1]->as().value.safeGet(); - - if (!limit) - throw Exception("Table function '" + getName() + "' limit should not be 0.", ErrorCodes::BAD_ARGUMENTS); - - if (args.size() >= 3) - max_array_length = args[1]->as().value.safeGet(); - - if (args.size() >= 4) - max_string_length = args[1]->as().value.safeGet(); - - if (args.size() == 5) - random_seed = args[1]->as().value.safeGet(); - - ColumnsDescription columns = parseColumnsListFromString(structure, context); - - Block res_block; - for (const auto & name_type : columns.getOrdinary()) +void registerStorageGenerate(StorageFactory & factory) +{ + factory.registerStorage("Generate", [](const StorageFactory::Arguments & args) { + ASTs & engine_args = args.engine_args; + + if (engine_args.size() < 1) + throw Exception("Storage Generate requires at least one argument: "\ + " structure(, max_array_length, max_string_length, random_seed).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (engine_args.size() > 5) + throw Exception("Storage Generate requires at most five arguments: "\ + " structure, max_array_length, max_string_length, random_seed.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + /// Parsing first argument as table structure and creating a sample block + std::string structure = engine_args[0]->as().value.safeGet(); + + UInt64 max_array_length_ = 10; + UInt64 max_string_length_ = 10; + UInt64 random_seed_ = 0; // zero for random + + /// Parsing second argument if present + if (engine_args.size() >= 2) + max_array_length_ = engine_args[1]->as().value.safeGet(); + + if (engine_args.size() >= 3) + max_string_length_ = engine_args[2]->as().value.safeGet(); + + if (engine_args.size() == 4) + random_seed_ = engine_args[3]->as().value.safeGet(); + + /// do not use predefined seed + if (!random_seed_) + random_seed_ = randomSeed(); + + + return StorageGenerate::create(args.table_id, args.columns, max_array_length_, max_string_length_, random_seed_); + }); +} + +Pipes StorageGenerate::read( + const Names & column_names, + const SelectQueryInfo & /*query_info*/, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned /*num_streams*/) +{ + check(column_names, true); + + Pipes pipes; + const ColumnsDescription & columns_ = getColumns(); + + for (const auto & name : column_names) + { + const auto & name_type = columns_.get(name); MutableColumnPtr column = name_type.type->createColumn(); res_block.insert({std::move(column), name_type.type, name_type.name}); } for (auto & ctn : res_block.getColumnsWithTypeAndName()) { - fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, limit, max_array_length, max_string_length, random_seed); + fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, max_block_size, max_array_length, max_string_length, random_seed); } - auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); - res->startup(); - return res; -} -void registerTableFunctionRandom(TableFunctionFactory & factory) -{ - factory.registerFunction(TableFunctionFactory::CaseInsensitive); + Chunk chunk(res_block.getColumns(), res_block.rows()); + pipes.emplace_back(std::make_shared(res_block.cloneEmpty(), std::move(chunk))); + + return pipes; } } - - diff --git a/dbms/src/Storages/StorageGenerate.h b/dbms/src/Storages/StorageGenerate.h new file mode 100644 index 00000000000..6332b616c9a --- /dev/null +++ b/dbms/src/Storages/StorageGenerate.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + + +namespace DB +{ +/* Generates random data for given schema. + */ +class StorageGenerate : public ext::shared_ptr_helper, public IStorage +{ + friend struct ext::shared_ptr_helper; +public: + std::string getName() const override { return "Generate"; } + + Pipes read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + +private: + Block res_block; + + UInt64 max_array_length = 10; + UInt64 max_string_length = 10; + UInt64 random_seed = 0; + +protected: + StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_, + UInt64 max_array_length, UInt64 max_string_length,UInt64 random_seed); +}; + +} diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index c3167dd1c20..e9601577f1d 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -29,8 +29,9 @@ void registerStorages() registerStorageView(factory); registerStorageMaterializedView(factory); registerStorageLiveView(factory); + registerStorageGenerate(factory); - #if USE_AWS_S3 +#if USE_AWS_S3 registerStorageS3(factory); #endif diff --git a/dbms/src/Storages/registerStorages.h b/dbms/src/Storages/registerStorages.h index c6decff5876..ed00225104f 100644 --- a/dbms/src/Storages/registerStorages.h +++ b/dbms/src/Storages/registerStorages.h @@ -23,6 +23,7 @@ void registerStorageJoin(StorageFactory & factory); void registerStorageView(StorageFactory & factory); void registerStorageMaterializedView(StorageFactory & factory); void registerStorageLiveView(StorageFactory & factory); +void registerStorageGenerate(StorageFactory & factory); #if USE_AWS_S3 void registerStorageS3(StorageFactory & factory); diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.cpp b/dbms/src/TableFunctions/TableFunctionGenerate.cpp new file mode 100644 index 00000000000..52b65633d8f --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionGenerate.cpp @@ -0,0 +1,80 @@ +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "registerTableFunctions.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; + extern const int BAD_TYPE_OF_FIELD; + extern const int LOGICAL_ERROR; +} + +StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + + ASTs & args = args_func.at(0)->children; + + if (args.size() < 1) + throw Exception("Table function '" + getName() + "' requires at least one argument: "\ + " structure(, max_array_length, max_string_length, random_seed).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (args.size() > 4) + throw Exception("Table function '" + getName() + "' requires at most four arguments: "\ + " structure, max_array_length, max_string_length, random_seed.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + /// Parsing first argument as table structure and creating a sample block + std::string structure = args[0]->as().value.safeGet(); + + UInt64 max_array_length = 10; + UInt64 max_string_length = 10; + UInt64 random_seed = 0; // zero for random + + /// Parsing second argument if present + if (args.size() >= 2) + max_array_length = args[1]->as().value.safeGet(); + + if (args.size() >= 3) + max_string_length = args[2]->as().value.safeGet(); + + if (args.size() == 4) + random_seed = args[3]->as().value.safeGet(); + + ColumnsDescription columns = parseColumnsListFromString(structure, context); + + auto res = StorageGenerate::create(StorageID(getDatabaseName(), table_name), columns, max_array_length, max_string_length, random_seed); + res->startup(); + return res; +} + +void registerTableFunctionGenerate(TableFunctionFactory & factory) +{ + factory.registerFunction(TableFunctionFactory::CaseInsensitive); +} + +} + + diff --git a/dbms/src/TableFunctions/TableFunctionRandom.h b/dbms/src/TableFunctions/TableFunctionGenerate.h similarity index 90% rename from dbms/src/TableFunctions/TableFunctionRandom.h rename to dbms/src/TableFunctions/TableFunctionGenerate.h index c4f8e2bca37..0b228fc0e5e 100644 --- a/dbms/src/TableFunctions/TableFunctionRandom.h +++ b/dbms/src/TableFunctions/TableFunctionGenerate.h @@ -7,7 +7,7 @@ namespace DB /* random(structure, limit) - creates a temporary storage filling columns with random data * random is case-insensitive table function */ -class TableFunctionRandom : public ITableFunction +class TableFunctionGenerate : public ITableFunction { public: static constexpr auto name = "generate"; diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 91b6b94440c..b9a96532b13 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -15,7 +15,7 @@ void registerTableFunctions() registerTableFunctionURL(factory); registerTableFunctionValues(factory); registerTableFunctionInput(factory); - registerTableFunctionRandom(factory); + registerTableFunctionGenerate(factory); #if USE_AWS_S3 registerTableFunctionS3(factory); diff --git a/dbms/src/TableFunctions/registerTableFunctions.h b/dbms/src/TableFunctions/registerTableFunctions.h index 8ae5ab339f4..e1be5477c25 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.h +++ b/dbms/src/TableFunctions/registerTableFunctions.h @@ -12,7 +12,7 @@ void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctionURL(TableFunctionFactory & factory); void registerTableFunctionValues(TableFunctionFactory & factory); void registerTableFunctionInput(TableFunctionFactory & factory); -void registerTableFunctionRandom(TableFunctionFactory & factory); +void registerTableFunctionGenerate(TableFunctionFactory & factory); #if USE_AWS_S3 void registerTableFunctionS3(TableFunctionFactory & factory); From f96626cea7e52047564b7227b7e8a686d1dc1132 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 26 Feb 2020 17:31:22 +0300 Subject: [PATCH 068/215] fix tests --- .../TableFunctions/TableFunctionGenerate.cpp | 2 +- .../01072_random_table_function.reference | 376 +++++++++--------- .../01072_random_table_function.sql | 114 ++++-- 3 files changed, 264 insertions(+), 228 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.cpp b/dbms/src/TableFunctions/TableFunctionGenerate.cpp index 52b65633d8f..cf23977a18a 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerate.cpp @@ -64,7 +64,7 @@ StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const random_seed = args[3]->as().value.safeGet(); ColumnsDescription columns = parseColumnsListFromString(structure, context); - + auto res = StorageGenerate::create(StorageID(getDatabaseName(), table_name), columns, max_array_length, max_string_length, random_seed); res->startup(); return res; diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference index 3111e897928..c0ca8469114 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.reference +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.reference @@ -1,240 +1,238 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -5303502880439445170 5303502880439445170 1234817989 1234817989 54213 -11323 197 -59 -1181548283007126200 1181548283007126200 1171957426 1171957426 42674 -22862 178 -78 -776938933779198124 776938933779198124 275100647 275100647 46055 -19481 231 -25 -3079123348992892438 3079123348992892438 1033685688 1033685688 51896 -13640 184 -72 -9387607354063286610 -9059136719646265006 180895192 180895192 15832 15832 216 -40 -1372288647685551139 1372288647685551139 135557292 135557292 28844 28844 172 -84 -10000611939545164999 -8446132134164386617 716914271 716914271 15967 15967 95 95 -1156046020585944904 1156046020585944904 1012211222 1012211222 7702 7702 22 22 -8349022212802674610 8349022212802674610 2185722662 -2109244634 31526 31526 38 38 -6331568246612505101 6331568246612505101 2647224658 -1647742638 29010 29010 82 82 +6067806098226807350 6067806098226807350 1412771199 1412771199 11647 11647 127 127 +533919876299229763 533919876299229763 1791099446 1791099446 566 566 54 54 +4640348442306208352 4640348442306208352 124312908 124312908 56652 -8884 76 76 +9035287264038165814 9035287264038165814 1968572995 1968572995 2627 2627 67 67 +3335561882147042384 3335561882147042384 1080415314 1080415314 54354 -11182 82 82 +1967107582972097042 1967107582972097042 2578637408 -1716329888 57952 -7584 96 96 +12006909317836685474 -6439834755872866142 2103691749 2103691749 51685 -13851 229 -27 +1211441240496071423 1211441240496071423 1218125110 1218125110 7478 7478 54 54 +11787857998754938359 -6658886074954613257 776621019 776621019 19419 19419 219 -37 +13395460454860618677 -5051283618848932939 4155847760 -139119536 13392 13392 80 80 - Enum8(\'hello\' = 1, \'world\' = 5) world hello -world -hello -hello hello world hello hello +world +hello +world hello - Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) -['world','hello','world','hello','hello','hello','world','hello','hello'] -['hello','world','world','hello','world','world'] -['hello','world','hello','hello','world','world','world'] -['world','world','world','world','world','world','hello','hello'] -['world','hello'] -['hello','hello'] -['world'] -['hello','hello'] -['hello','hello'] -['hello','world','hello','hello','world','world','world','world'] +['world','hello','hello','world','hello','hello','world','hello','world'] +['hello','hello','hello','hello','hello','world'] +['world','world','world','world','world','hello','hello','world'] +['hello','hello','world','hello','hello'] +['hello','world','world','world'] +['world','world','world','hello','hello','hello','world','hello'] +['world','world','hello','hello','hello','world','hello','hello','hello'] +[] +['hello','hello','hello','hello','world','hello','world','world','hello'] +[] - Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) +h +w o w -w -w -h -w +o h h w o +o - Date DateTime DateTime(\'Europe/Moscow\') -2106-02-07 2009-02-16 23:59:49 2009-02-16 23:59:49 -2086-11-02 2007-02-20 10:43:46 2007-02-20 10:43:46 -2096-02-04 1978-09-20 03:50:47 1978-09-20 03:50:47 -2106-02-07 2002-10-04 02:54:48 2002-10-04 02:54:48 -2013-05-07 1975-09-25 19:39:52 1975-09-25 19:39:52 -2048-12-21 1974-04-19 01:48:12 1974-04-19 01:48:12 -2013-09-19 1992-09-19 18:51:11 1992-09-19 18:51:11 -1991-02-02 2002-01-28 12:47:02 2002-01-28 12:47:02 -2056-04-25 2039-04-06 20:11:02 2039-04-06 20:11:02 -2049-06-05 2053-11-20 07:10:58 2053-11-20 07:10:58 +2001-11-21 2014-10-08 16:26:39 2014-10-08 16:26:39 +1971-07-21 2026-10-04 10:37:26 2026-10-04 10:37:26 +2106-02-07 1973-12-09 22:21:48 1973-12-09 22:21:48 +1977-03-12 2032-05-19 12:49:55 2032-05-19 12:49:55 +2106-02-07 2004-03-27 22:21:54 2004-03-27 22:21:54 +2106-02-07 2051-09-18 11:10:08 2051-09-18 11:10:08 +2106-02-07 2036-08-30 09:49:09 2036-08-30 09:49:09 +1990-06-23 2008-08-07 20:05:10 2008-08-07 20:05:10 +2023-03-03 1994-08-11 20:03:39 1994-08-11 20:03:39 +2006-09-01 2101-09-11 05:09:20 2101-09-11 05:09:20 - DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') -2007-02-20 10:43:46.989 2007-02-20 10:43:46.817989 2007-02-20 10:43:46.817989 -2002-10-04 02:54:48.647 2002-10-04 02:54:48.100647 2002-10-04 02:54:48.100647 -1974-04-19 01:48:12.192 1974-04-19 01:48:12.895192 1974-04-19 01:48:12.895192 -2002-01-28 12:47:02.271 2002-01-28 12:47:02.914271 2002-01-28 12:47:02.914271 -2053-11-20 07:10:58.662 2053-11-20 07:10:58.722662 2053-11-20 07:10:58.722662 -1986-04-08 19:07:15.849 1986-04-08 19:07:15.510849 1986-04-08 19:07:15.510849 -2081-03-06 04:00:55.914 2081-03-06 04:00:55.448914 2081-03-06 04:00:55.448914 -1979-01-20 20:39:20.939 1979-01-20 20:39:20.162939 1979-01-20 20:39:20.162939 -2063-07-18 01:46:10.215 2063-07-18 01:46:10.908215 2063-07-18 01:46:10.908215 -1996-11-02 14:35:41.110 1996-11-02 14:35:41.183110 1996-11-02 14:35:41.183110 +2026-10-04 10:37:26.199 2026-10-04 10:37:26.771199 2026-10-04 10:37:26.771199 +2032-05-19 12:49:55.908 2032-05-19 12:49:55.312908 2032-05-19 12:49:55.312908 +2051-09-18 11:10:08.314 2051-09-18 11:10:08.415314 2051-09-18 11:10:08.415314 +2008-08-07 20:05:10.749 2008-08-07 20:05:10.691749 2008-08-07 20:05:10.691749 +2101-09-11 05:09:20.019 2101-09-11 05:09:20.621019 2101-09-11 05:09:20.621019 +2062-07-10 05:18:58.924 2062-07-10 05:18:58.002924 2062-07-10 05:18:58.002924 +2074-03-12 07:52:50.424 2074-03-12 07:52:50.576424 2074-03-12 07:52:50.576424 +2075-04-13 03:14:55.643 2075-04-13 03:14:55.060643 2075-04-13 03:14:55.060643 +1990-06-27 22:41:59.565 1990-06-27 22:41:59.574565 1990-06-27 22:41:59.574565 +2071-06-01 23:38:13.679 2071-06-01 23:38:13.873679 2071-06-01 23:38:13.873679 - Float32 Float64 -9.783235e37 5.168430093085938e307 -9.285203e37 4.905322146512668e307 -2.1795718e37 1.151455903014231e307 -8.1897013e37 4.326574656543525e307 -1.4331993e37 7.571513877802428e306 -1.0739954e37 5.673859577292225e306 -5.67998e37 3.000702391289156e307 -8.019563e37 4.236691550453344e307 -1.7317079e38 9.148518147657713e307 -2.0973474e38 1.108017190180919e308 +1.1193126e38 5.913267578382113e307 +1.4190551e38 7.49679091079062e307 +9.849083e36 5.2032168334909443e306 +1.5596642e38 8.239620736359643e307 +8.559931e37 4.522165267798421e307 +2.043007e38 1.0793094445811738e308 +1.6667162e38 8.805171157988522e307 +9.650981e37 5.098560704291471e307 +6.1530254e37 3.25060979130477e307 +3.2926016e38 1.7394635336064728e308 - Decimal32(4) Decimal64(8) Decimal64(8) -123481.7989 53035028804.39445170 9783236031310378439643.1472294664915640 -117195.7426 11815482830.07126200 1433199368304978416824.6218455877230102 -27510.0647 7769389337.79198124 -16711157663899806765510.0649541344873437 -103368.5688 30791233489.92892438 -15580363795141897721982.8655941091887288 -18089.5192 -90591367196.46265006 15401227599802737909025.7109101500379661 -13555.7292 13722886476.85551139 7984757011464664209957.7603879176325739 -71691.4271 -84461321341.64386617 5816721179226388864892.0657225650146885 -101221.1222 11560460205.85944904 -5775130779420200933943.2970518536587084 --210924.4634 83490222128.02674610 -5061941637011485215525.6417334537314088 --164774.2638 63315682466.12505101 -7557281935671753244601.7559777699677732 +141277.1199 60678060982.26807350 11193126615213725123159.9175630570523203 +179109.9446 5339198762.99229763 8559932011929819709498.3386368958995766 +12431.2908 46403484423.06208352 6153025631377423463914.5986917361218066 +196857.2995 90352872640.38165814 -11879398367401741707407.7196878679186689 +108041.5314 33355618821.47042384 -12283476719506904826004.1868293152015435 +-171632.9888 19671075829.72097042 15831040503634978760989.7361741136110978 +210369.1749 -64398347558.72866142 9875885121509458147096.4410492055471380 +121812.5110 12114412404.96071423 600964137921042469886.3230431219622973 +77662.1019 -66588860749.54613257 -5499852289308439053567.7017000383452336 +-13911.9536 -50512836188.48932939 -1027137932784200042018.0642287160896946 - Tuple(Int32, Int64) -(1234817989,5303502880439445170) -(1171957426,1181548283007126200) -(275100647,776938933779198124) -(1033685688,3079123348992892438) -(180895192,-9059136719646265006) -(135557292,1372288647685551139) -(716914271,-8446132134164386617) -(1012211222,1156046020585944904) -(-2109244634,8349022212802674610) -(-1647742638,6331568246612505101) +(1412771199,6067806098226807350) +(1791099446,533919876299229763) +(124312908,4640348442306208352) +(1968572995,9035287264038165814) +(1080415314,3335561882147042384) +(-1716329888,1967107582972097042) +(2103691749,-6439834755872866142) +(1218125110,1211441240496071423) +(776621019,-6658886074954613257) +(-139119536,-5051283618848932939) - Array(Int8) -[-59,-78,-25,-72,-40,-84,95,22,38] -[82,65,35,-110,-57,-69] -[72,119,-78,-58,13,39,-71] -[81,107,-11,-63,-59,69,-80,-122] -[87,-76] -[22,-84] -[-45] -[-40,84] -[-104,-86] -[-36,123,44,60,5,25,-5,-127] +[127,54,76,67,82,96,-27,54,-37] +[80,-20,18,104,-94,99] +[-1,101,-9,79,-75,-106,-124,25] +[-126,76,-7,-28,20] +[50,109,25,61] +[119,75,-17,80,-26,-86,-1,78] +[87,87,-98,-52,-10,-55,90,-116,-110] +[] +[-8,30,126,-82,-83,-112,109,-37,46] +[] - Array(Nullable(Int32)) -[1234817989,1171957426,275100647,1033685688,180895192,135557292,716914271,1012211222,-2109244634] -[-1647742638,319510849,513356835,-1966518382,-786518841,269162939] -[285701960,1943908215,-1343029326,1474183110,846934541,1007818023,-1664171079] -[195050577,371018347,734173429,2001591233,-1812297275,1172704837,-728923984,774864518] -[-462583209,-1520633676] -[-638906858,1986832300] -[378774483] -[-1399152424,-953863084] -[733724312,-23652950] -[371735004,462118779,148602156,-1055384004,-1041274619,247762201,522289659,822210177] +[1412771199,1791099446,124312908,1968572995,1080415314,-1716329888,2103691749,1218125110,776621019] +[-139119536,458002924,-1375243758,-1499390872,-1006911326,282060643] +[-972627201,-1550392731,646512119,-1176093617,-1094550603,1998158230,-1491853180,-1004684263] +[-1087003262,1246511948,-287482375,1245659620,-447199980] +[75852338,-10461331,924798233,-707845059] +[-694178953,-1402336437,497728751,1937926992,-129643034,982571178,2100766207,-1068740018] +[473070167,-1859986345,611197598,-1197393460,640993526,1545236681,654453082,1474541196,293114258] +[] +[-894672648,-272503522,-1437033090,-1467734354,-1327634003,176306576,1752494957,-228452133,1262726958] +[] - Tuple(Int32, Array(Int64)) -(1234817989,[5303502880439445170,1181548283007126200,776938933779198124,3079123348992892438,-9059136719646265006,1372288647685551139,-8446132134164386617,1156046020585944904,8349022212802674610]) -(1171957426,[6331568246612505101,4328545451735172025,837735849651948139,3153250869148769217,-7783757525582213563,-3130704671775162746]) -(275100647,[-1986779751559399244,-2744084058313283668,1626824019940122840,-4096810749907976552,-101588646332188196,1984785042821053740,-4532839778647840507]) -(1033685688,[1064130551002268155,3531365820900360285,-6430432825062036096,673694157720662742,1098480112755237234,-5582206543758520198,1822983700066063177,-3760535601444222426]) -(180895192,[7337695457286344221,5872187140729212089]) -(135557292,[5142382871975234625,-3506363600639473930]) -(716914271,[2213685469994781810]) -(1012211222,[-5533057085551498458,1093259139288543534]) -(-2109244634,[2385587103426775499,6630437694997012859]) -(-1647742638,[-8881439430624548159,6128668282898355635,5437491183672208077,3669750475264598009,1788253815182834821,3436179822464488712,6060518060841781539,5049334787152748055]) +(1412771199,[6067806098226807350,533919876299229763,4640348442306208352,9035287264038165814,3335561882147042384,1967107582972097042,-6439834755872866142,1211441240496071423,-6658886074954613257]) +(1791099446,[-5051283618848932939,8582024252886360196,-4315086049182898814,5353728054740737529,5350067333695554836,325783315319644013]) +(124312908,[3971978169720710205,-2981475897813890229,2137728709761854288,-556812590201644886,9022722158833193550,2031820898413239383,2625073697901328844,2753046232662962377]) +(1968572995,[2810854585430947468,1258916155501601016,-1170393712176882306,-6303871046678353491,757230979742233453]) +(1080415314,[-981194438673715410,-5187418663246381396,-4552736212124933548,-595622618346691759]) +(-1716329888,[-5756535245380317560,-3316827179903536443,1131631900550093840,-976331946980643879,-5646085236330506185,2420896254251217582,826917553648174074,-3966953861420592496]) +(2103691749,[-5286631142891306752,-7301789732104984637,1910103380304709263,-5245849785367812780,-1186131377522014764,6186892281648022452,-4757957517150938497,681345985121370407,8244427342487150660]) +(1218125110,[]) +(776621019,[-7434954659106758677,3335701458961542184,-5391405201124517858,-1030158175555181024,-4227500256565221046,-2681544507147310992,-5063942269050242472,-4220143753454148778,-4074029704161347282]) +(-139119536,[]) - Nullable(String) -)/VC)%f9 -\0ih|;B -\0J"Z,kd -\0m"m]$35 -\00 -\0( -\0 -\0g -\0> -\0XjbW:s< +G}8MF We +\0RD7LF +\0q_k7J2t +\0TPO- +\0xD: +\0:77y{h< +\0idoXZ4#[ + +\0>i, mD s + - Array(String) -['(|ZVAg2F','\0GXjbW','\0<^guT(','\0y M$lZ0','\03','\0p','\0','\0i','\0P'] -['\0"}YRG%B','\0T3(E^> p','\0JTaj','\0)*3','\0k%=p','\0Yub$81`X'] -['','\0\\p]|]','\05','\0k$C/pnA'] -['\0ryz{*p',''] -['\07`mjt*G',''] -['\0~g'] -['\0k','\0 '] -['\0F','\0&hcmnX','','\0JZ+O|fh\\'] +['','\0au#','\0P?>\\P5o','\0>T:','','\07d','\0','','\0Fl0FG'] +['\0=CKI?m0','\0wk\\:','\0f6','\0XSz~LH'] +['\0;<','\0Mj7D(lyv','\0','\0[','\0p','\0UZisIY(','\0{&^jL3','\0*8#pXRA'] +['\0Hdz-II','\0','\0SHd6k\'W','\0qco~]','\0Uc5=H','\0','\0Q','\0O91 3','\0>OeC./z'] +[] +['\0,xV{H.R','\0~~m','\0WKKk4','\06','\0bt','\04 -\0XjbW:s< +G}8MF We +\0RD7LF +\0q_k7J2t +\0TPO- +\0xD: +\0:77y{h< +\0idoXZ4#[ + +\0>i, mD s + - -[77] -124167.6723 ('2061-04-17 21:59:44.573','b5fd844d-abb8-6b20-d7d0-f86e1120e744') -[32,110] -141397.7312 ('1979-02-09 03:43:48.526','bce332bd-cf45-f2cb-7da5-ddf9370fb1c7') -[68] -67417.0770 ('2080-03-12 14:17:31.269','bebf8e52-1ceb-73c7-2ead-f1eed124f71d') +[127] 141277.1199 ('2026-10-04 10:37:26.199','54352d7f-6ac2-0236-0768-dd4c75560a43') +[] 179109.9446 ('2032-05-19 12:49:55.908','4065d452-99b2-e260-7d63-c9e5489b1d36') +[] 12431.2908 ('2051-09-18 11:10:08.314','2e4a4bdb-f7b5-3450-1b4c-91ecae077212') +[54] 196857.2995 ('2008-08-07 20:05:10.749','a6a11c68-c3fb-c0a2-10cf-e763c606e2ff') +[] 108041.5314 ('2101-09-11 05:09:20.019','a396e265-2688-fdf7-b9e6-3c4fbec27bb5') +[] -171632.9888 ('2062-07-10 05:18:58.924','77197996-a714-2084-c41d-bc19bf35a582') +[76] 210369.1749 ('2074-03-12 07:52:50.424','4a4c434c-eedd-5df9-4a3f-41e4e5584514') +[] 121812.5110 ('2075-04-13 03:14:55.643','04856a32-ff60-5f6d-371f-4d19d5cf243d') +[67] 77662.1019 ('1990-06-27 22:41:59.565','d69fab77-ac6a-0b4b-1daa-bcef73826b50') +[] -13911.9536 ('2071-06-01 23:38:13.679','f845cde6-3a90-d8aa-7d37-25ffc04c524e') - -[-59,-78,-25,-72,-40,-84,95,22,38] 1234817989 )/VC)%f9 123481.7989 o 5.168430093085938e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4999d3c5-45da-a6b2-1065-b3e73d9ccab8') Ų -[82,65,35,-110,-57,-69] 1171957426 \0ih|;B 117195.7426 w 4.905322146512668e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','0ac83dd8-0814-70ac-2abb-3e5f3c551e16') -[72,119,-78,-58,13,39,-71] 275100647 \0J"Z,kd 27510.0647 w 1.151455903014231e307 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','82477b26-9dc9-7152-130b-59411e993423') ج -[81,107,-11,-63,-59,69,-80,-122] 1033685688 \0m"m]$35 103368.5688 w 4.326574656543525e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','8ac94f92-d11e-acc7-100b-19bb11077748') _ -[87,-76] 180895192 \00 18089.5192 h 7.571513877802428e306 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','73ddaf77-aff2-ffb2-57de-3fc6327b320d') &R -[22,-84] 135557292 \0( 13555.7292 w 5.673859577292225e306 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','3c121527-9cce-c3b9-0ba0-3c51161d4a6b') A# -[-45] 716914271 \0 71691.4271 h 3.000702391289156e307 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','2bc298f5-774d-dbc1-93fa-89c545e60e45') -[-40,84] 1012211222 \0g 101221.1222 h 4.236691550453344e307 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','d48d80b0-2e2f-7e86-e46d-8a57a55cf8b4') H -[-104,-86] 2185722662 \0> -210924.4634 w 9.148518147657713e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','d9eb0e16-766c-a7ac-1693-a3d3ac9aa0d8') w -[-36,123,44,60,5,25,-5,-127] 2647224658 \0XjbW:s< -164774.2638 o 1.108017190180919e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','c7253454-2bbb-be98-fe97-15aa162839dc') \r +[-59] 1234817989 )/V 123481.7989 o 5.168430093085938e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4999d3c5-45da-a6b2-1065-b3e73d9ccab8') Ų +[-78] 1171957426 \0 117195.7426 w 4.905322146512668e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','0ac83dd8-0814-70ac-2abb-3e5f3c551e16') +[-25,-72] 275100647 \09 27510.0647 w 1.151455903014231e307 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','82477b26-9dc9-7152-130b-59411e993423') ج +[] 1033685688 \0ih 103368.5688 w 4.326574656543525e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','8ac94f92-d11e-acc7-100b-19bb11077748') _ +[-40] 180895192 \0B 18089.5192 h 7.571513877802428e306 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','73ddaf77-aff2-ffb2-57de-3fc6327b320d') &R +[] 135557292 \0J 13555.7292 w 5.673859577292225e306 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','3c121527-9cce-c3b9-0ba0-3c51161d4a6b') A# +[-84,95] 716914271 \0 71691.4271 h 3.000702391289156e307 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','2bc298f5-774d-dbc1-93fa-89c545e60e45') +[22,38] 1012211222 \0d 101221.1222 h 4.236691550453344e307 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','d48d80b0-2e2f-7e86-e46d-8a57a55cf8b4') H +[82,65] 2185722662 \0m -210924.4634 w 9.148518147657713e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','d9eb0e16-766c-a7ac-1693-a3d3ac9aa0d8') w +[35] 2647224658 \0]$ -164774.2638 o 1.108017190180919e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','c7253454-2bbb-be98-fe97-15aa162839dc') \r - diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01072_random_table_function.sql index dc212a7b8ff..0329fa81bf1 100644 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.sql +++ b/dbms/tests/queries/0_stateless/01072_random_table_function.sql @@ -3,137 +3,174 @@ SELECT toTypeName(ui32), toTypeName(i32), toTypeName(ui16), toTypeName(i16), toTypeName(ui8), toTypeName(i8) -FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 1); +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') +LIMIT 1; SELECT ui64, i64, ui32, i32, ui16, i16, ui8, i8 -FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 10, 1); +FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 1); +FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)') +LIMIT 1; SELECT i -FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 10, 1); +FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 1); +FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))') +LIMIT 1; SELECT i -FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 10, 1); +FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i)s -FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 1); +FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))') +LIMIT 1; SELECT i -FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 10, 1); +FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(d), toTypeName(dt), toTypeName(dtm) -FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 1); +FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')') +LIMIT 1; SELECT d, dt, dtm -FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 10, 1); +FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(dt64), toTypeName(dts64), toTypeName(dtms64) -FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 1); +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')') +LIMIT 1; SELECT dt64, dts64, dtms64 -FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 10, 1); +FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(f32), toTypeName(f64) -FROM generate('f32 Float32, f64 Float64', 1); +FROM generate('f32 Float32, f64 Float64') +LIMIT 1; SELECT f32, f64 -FROM generate('f32 Float32, f64 Float64', 10, 10, 10, 1); +FROM generate('f32 Float32, f64 Float64', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(d32), toTypeName(d64), toTypeName(d64) -FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 1); +FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)') +LIMIT 1; SELECT d32, d64, d128 -FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 10, 1); +FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Tuple(Int32, Int64)', 1); +FROM generate('i Tuple(Int32, Int64)') +LIMIT 1; SELECT i -FROM generate('i Tuple(Int32, Int64)', 10, 10, 10, 1); +FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Int8)', 1); +FROM generate('i Array(Int8)') +LIMIT 1; SELECT i -FROM generate('i Array(Int8)', 10, 10, 10, 1); +FROM generate('i Array(Int8)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Nullable(Int32))', 1); +FROM generate('i Array(Nullable(Int32))') +LIMIT 1; SELECT i -FROM generate('i Array(Nullable(Int32))', 10, 10, 10, 1); +FROM generate('i Array(Nullable(Int32))', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Tuple(Int32, Array(Int64))', 1); +FROM generate('i Tuple(Int32, Array(Int64))') +LIMIT 1; SELECT i -FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 10, 1); +FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Nullable(String)', 1); +FROM generate('i Nullable(String)', 1) +LIMIT 1; SELECT i -FROM generate('i Nullable(String)', 10, 10, 10, 1); +FROM generate('i Nullable(String)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(String)', 1); +FROM generate('i Array(String)') +LIMIT 1; SELECT i -FROM generate('i Array(String)', 10, 10, 10, 1); +FROM generate('i Array(String)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i UUID', 1); +FROM generate('i UUID') +LIMIT 1; SELECT i -FROM generate('i UUID', 10, 10, 10, 1); +FROM generate('i UUID', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Nullable(UUID))', 1); +FROM generate('i Array(Nullable(UUID))') +LIMIT 1; SELECT i -FROM generate('i Array(Nullable(UUID))', 10, 10, 10, 1); +FROM generate('i Array(Nullable(UUID))', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i FixedString(4)', 1); +FROM generate('i FixedString(4)') +LIMIT 1; SELECT i -FROM generate('i FixedString(4)', 10, 10, 10, 1); +FROM generate('i FixedString(4)', 10, 10, 1) +LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i String', 10); +FROM generate('i String') +LIMIT 1; SELECT i -FROM generate('i String', 10, 10, 10, 1); +FROM generate('i String', 10, 10, 1) +LIMIT 10; SELECT '-'; DROP TABLE IF EXISTS test_table; CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Memory; -INSERT INTO test_table SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); +INSERT INTO test_table SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 2, 10, 1) +LIMIT 10; SELECT * FROM test_table; @@ -143,7 +180,8 @@ SELECT '-'; DROP TABLE IF EXISTS test_table_2; CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)) ENGINE=Memory; -INSERT INTO test_table_2 SELECT * FROM generate('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)', 10, 3, 5, 10); +INSERT INTO test_table_2 SELECT * FROM generate('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)', 3, 5, 10) +LIMIT 10; SELECT * FROM test_table_2; SELECT '-'; From 1f3c6edee52757d48422987d2353b5bc2478ef2e Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 26 Feb 2020 17:42:03 +0300 Subject: [PATCH 069/215] fix sed with binary results (sed: RE error: illegal byte sequence) --- dbms/tests/clickhouse-test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 7d3d65defc6..aafb71a2ffa 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -76,8 +76,8 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. - os.system("sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file)) - os.system("sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file)) + os.system("LC_ALL=C LC_CTYPE=C LANG=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stdout_file)) + os.system("LC_ALL=C LC_CTYPE=C LANG=C sed -i -e 's/{test_db}/default/g' {file}".format(test_db=args.database, file=stderr_file)) stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else '' stdout = unicode(stdout, errors='replace', encoding='utf-8') From 181dd7dfba911d6e313dd07a91dc8d5d142f794f Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 26 Feb 2020 18:42:45 +0300 Subject: [PATCH 070/215] add perftest --- .../performance/generate_table_function.xml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 dbms/tests/performance/generate_table_function.xml diff --git a/dbms/tests/performance/generate_table_function.xml b/dbms/tests/performance/generate_table_function.xml new file mode 100644 index 00000000000..bca2c53d74d --- /dev/null +++ b/dbms/tests/performance/generate_table_function.xml @@ -0,0 +1,30 @@ + + loop + + + + 4000 + 10000 + + + + SELECT COUNT(*) FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000; + SELECT COUNT(*) FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i UUID', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM generate('i String', 10, 10, 1) LIMIT 100000; + From 3e3cafc4874931b0b4081f639457805c847ebd08 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 26 Feb 2020 21:32:15 +0300 Subject: [PATCH 071/215] Implement method "structureEquals" for ColumnAggregateFunction --- dbms/src/Columns/ColumnAggregateFunction.cpp | 15 +++++++++++++++ dbms/src/Columns/ColumnAggregateFunction.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 845ae47e623..e0808f01c85 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -157,6 +157,21 @@ void ColumnAggregateFunction::ensureOwnership() } +bool ColumnAggregateFunction::structureEquals(const IColumn & to) const +{ + const auto * to_concrete = typeid_cast(&to); + if (!to_concrete) + return false; + + /// AggregateFunctions must be the same. + + const IAggregateFunction & func_this = *func; + const IAggregateFunction & func_to = *to_concrete->func; + + return typeid(func_this) == typeid(func_to); +} + + void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) { const ColumnAggregateFunction & from_concrete = assert_cast(from); diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index af1825dca87..8e17a28cf4e 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -204,6 +204,8 @@ public: } void getExtremes(Field & min, Field & max) const override; + + bool structureEquals(const IColumn &) const override; }; From 6ced072e294b7680886af5b138006ab3d5b064d2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 26 Feb 2020 22:05:02 +0300 Subject: [PATCH 072/215] Added a test --- .../01088_array_slice_of_aggregate_functions.reference | 1 + .../0_stateless/01088_array_slice_of_aggregate_functions.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.reference create mode 100644 dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql diff --git a/dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.reference b/dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.reference new file mode 100644 index 00000000000..57fed5c75ed --- /dev/null +++ b/dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.reference @@ -0,0 +1 @@ +['\0\0\0\0\0'] diff --git a/dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql b/dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql new file mode 100644 index 00000000000..ba525f30228 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql @@ -0,0 +1 @@ +select arraySlice(groupArray(x),1,1) as y from (select uniqState(number) as x from numbers(10) group by number); From 4033fd4118f47bd65100e8ab5bbcef314b7f019c Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 26 Feb 2020 17:21:56 +0300 Subject: [PATCH 073/215] improve performance of index analyzing with 'IN' --- dbms/src/Interpreters/Set.cpp | 111 +++++++++++++++++----------------- dbms/src/Interpreters/Set.h | 36 ++++++++++- 2 files changed, 91 insertions(+), 56 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index cd45ecec379..5cbe59c0548 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -458,9 +458,19 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vectorcloneEmpty()); + right_point.emplace_back(ordered_set[i]->cloneEmpty()); + } + Block block_to_sort; SortDescription sort_description; for (size_t i = 0; i < tuple_size; ++i) @@ -484,13 +494,6 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, { size_t tuple_size = indexes_mapping.size(); - using FieldWithInfinityTuple = std::vector; - - FieldWithInfinityTuple left_point; - FieldWithInfinityTuple right_point; - left_point.reserve(tuple_size); - right_point.reserve(tuple_size); - bool invert_left_infinities = false; bool invert_right_infinities = false; @@ -512,14 +515,14 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, if (!new_range->left_included) invert_left_infinities = true; - left_point.push_back(FieldWithInfinity(new_range->left)); + left_point[i].update(new_range->left); } else { if (invert_left_infinities) - left_point.push_back(FieldWithInfinity::getPlusInfinity()); + left_point[i].update(ValueWithInfinity::PLUS_INFINITY); else - left_point.push_back(FieldWithInfinity::getMinusInfinity()); + left_point[i].update(ValueWithInfinity::MINUS_INFINITY); } if (new_range->right_bounded) @@ -527,79 +530,77 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, if (!new_range->right_included) invert_right_infinities = true; - right_point.push_back(FieldWithInfinity(new_range->right)); + right_point[i].update(new_range->right); } else { if (invert_right_infinities) - right_point.push_back(FieldWithInfinity::getMinusInfinity()); + right_point[i].update(ValueWithInfinity::MINUS_INFINITY); else - right_point.push_back(FieldWithInfinity::getPlusInfinity()); + right_point[i].update(ValueWithInfinity::PLUS_INFINITY); } } - /// This allows to construct tuple in 'ordered_set' at specified index for comparison with range. - - auto indices = ext::range(0, ordered_set.at(0)->size()); - - auto extract_tuple = [tuple_size, this](size_t i) + auto compare = [](const IColumn & lhs, const ValueWithInfinity & rhs, size_t row) { - /// Inefficient. - FieldWithInfinityTuple res; - res.reserve(tuple_size); - for (size_t j = 0; j < tuple_size; ++j) - res.emplace_back((*ordered_set[j])[i]); - return res; + auto type = rhs.getType(); + if (type != ValueWithInfinity::NORMAL) + return static_cast(type); + + return lhs.compareAt(row, 0, rhs.getColumnIfFinite(), 1); }; - auto compare = [&extract_tuple](size_t i, const FieldWithInfinityTuple & rhs) + auto less = [this, &compare, tuple_size](size_t row, const auto & point) { - return extract_tuple(i) < rhs; + for (size_t i = 0; i < tuple_size; ++i) + { + int res = compare(*ordered_set[i], point[i], row); + if (res) + return res < 0; + } + return false; }; - auto compare_one = [this](size_t i, const IColumn & rhs) + auto equals = [this, &compare, tuple_size](size_t row, const auto & point) { - return ordered_set[0]->compareAt(i, 0, rhs, 1) < 0; + for (size_t i = 0; i < tuple_size; ++i) + if (compare(*ordered_set[i], point[i], row) != 0) + return false; + return true; }; - auto lower_for_field = [&](const FieldWithInfinity & field) - { - if (field == FieldWithInfinity::getMinusInfinity()) - return indices.begin(); - if (field == FieldWithInfinity::getPlusInfinity()) - return indices.end(); - - auto temp_column = ordered_set[0]->cloneEmpty(); - temp_column->insert(field.getFieldIfFinite()); - return std::lower_bound(indices.begin(), indices.end(), *temp_column, compare_one); - }; - - decltype(indices.begin()) left_lower; - decltype(indices.begin()) right_lower; - /** Because each parallelogram maps to a contiguous sequence of elements * layed out in the lexicographically increasing order, the set intersects the range * if and only if either bound coincides with an element or at least one element * is between the lower bounds */ - if (tuple_size == 1) - { - left_lower = lower_for_field(left_point[0]); - right_lower = lower_for_field(right_point[0]); - } - else - { - left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, compare); - right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, compare); - } + auto indices = ext::range(0, size()); + auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, less); + auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, less); return { left_lower != right_lower - || (left_lower != indices.end() && extract_tuple(*left_lower) == left_point) - || (right_lower != indices.end() && extract_tuple(*right_lower) == right_point), + || (left_lower != indices.end() && equals(*left_lower, left_point)) + || (right_lower != indices.end() && equals(*right_lower, right_point)), true }; } +void ValueWithInfinity::update(const Field & x) +{ + /// Keep at most one element in column. + if (!column->empty()) + column->popBack(1); + column->insert(x); + type = NORMAL; +} + +const IColumn & ValueWithInfinity::getColumnIfFinite() const +{ + if (type != NORMAL) + throw Exception("Tring to get column of infinite type", ErrorCodes::LOGICAL_ERROR); + return *column; +} + } diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 9d79017adbd..8e6114bd8a7 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -16,7 +16,6 @@ namespace DB { struct Range; -class FieldWithInfinity; class IFunctionBase; using FunctionBasePtr = std::shared_ptr; @@ -180,6 +179,36 @@ using Sets = std::vector; class IFunction; using FunctionPtr = std::shared_ptr; +/** Class that represents single value with possible infinities. + * Single field is stored in column for more optimal inplace comparisons with other regular columns. + * Extracting fields from columns and further their comparison is suboptimal and requires extra copying. + */ +class ValueWithInfinity +{ +public: + enum Type + { + MINUS_INFINITY = -1, + NORMAL = 0, + PLUS_INFINITY = 1 + }; + + ValueWithInfinity(MutableColumnPtr && column_) + : column(std::move(column_)) {} + + void update(const Field & x); + void update(Type type_) { type = type_; } + + const IColumn & getColumnIfFinite() const; + + Type getType() const { return type; } + +private: + MutableColumnPtr column; + Type type; +}; + + /// Class for checkInRange function. class MergeTreeSetIndex { @@ -203,6 +232,11 @@ public: private: Columns ordered_set; std::vector indexes_mapping; + + using ColumnsWithInfinity = std::vector; + + ColumnsWithInfinity left_point; + ColumnsWithInfinity right_point; }; } From 491f454b5415c36067a2fd49899eefe022ee2dde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Rodriguez?= Date: Thu, 27 Feb 2020 10:34:06 +0100 Subject: [PATCH 074/215] MySQL connection mutualize connection + integration test --- base/mysqlxx/CMakeLists.txt | 2 + base/mysqlxx/include/mysqlxx/Pool.h | 2 + base/mysqlxx/include/mysqlxx/PoolFactory.h | 51 ++++++++ base/mysqlxx/include/mysqlxx/Value.h | 3 +- base/mysqlxx/src/Pool.cpp | 32 +++-- base/mysqlxx/src/PoolFactory.cpp | 122 ++++++++++++++++++ base/mysqlxx/src/PoolWithFailover.cpp | 5 +- .../Dictionaries/MySQLDictionarySource.cpp | 28 ++-- dbms/src/Dictionaries/MySQLDictionarySource.h | 2 +- .../ExternalDictionariesLoader.cpp | 8 ++ .../Interpreters/ExternalDictionariesLoader.h | 2 + .../configs/dictionaries/mysql_dict1.xml | 36 ++++++ .../configs/remote_servers.xml | 12 ++ .../test_dictionaries_mysql/test.py | 36 +++--- 14 files changed, 299 insertions(+), 42 deletions(-) create mode 100644 base/mysqlxx/include/mysqlxx/PoolFactory.h create mode 100644 base/mysqlxx/src/PoolFactory.cpp create mode 100644 dbms/tests/integration/test_dictionaries_mysql/configs/remote_servers.xml diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt index 2d2ad75628d..b85d3371336 100644 --- a/base/mysqlxx/CMakeLists.txt +++ b/base/mysqlxx/CMakeLists.txt @@ -8,6 +8,7 @@ add_library (mysqlxx src/Row.cpp src/Value.cpp src/Pool.cpp + src/PoolFactory.cpp src/PoolWithFailover.cpp include/mysqlxx/Connection.h @@ -15,6 +16,7 @@ add_library (mysqlxx include/mysqlxx/mysqlxx.h include/mysqlxx/Null.h include/mysqlxx/Pool.h + include/mysqlxx/PoolFactory.h include/mysqlxx/PoolWithFailover.h include/mysqlxx/Query.h include/mysqlxx/ResultBase.h diff --git a/base/mysqlxx/include/mysqlxx/Pool.h b/base/mysqlxx/include/mysqlxx/Pool.h index 5261ffab017..db41b059357 100644 --- a/base/mysqlxx/include/mysqlxx/Pool.h +++ b/base/mysqlxx/include/mysqlxx/Pool.h @@ -198,6 +198,8 @@ public: return description; } + void removeConnection(Connection* data); + protected: /// Number of MySQL connections which are created at launch. unsigned default_connections; diff --git a/base/mysqlxx/include/mysqlxx/PoolFactory.h b/base/mysqlxx/include/mysqlxx/PoolFactory.h new file mode 100644 index 00000000000..3c553b8b6da --- /dev/null +++ b/base/mysqlxx/include/mysqlxx/PoolFactory.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include "PoolWithFailover.h" + +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 + +namespace mysqlxx +{ +/* + * PoolFactory.h + * This class is a helper singleton to mutualize connections to MySQL. + */ +class PoolFactory final : private boost::noncopyable +{ +public: + static PoolFactory & instance(); + + PoolFactory(const PoolFactory &) = delete; + + /** Allocates a PoolWithFailover to connect to MySQL. */ + PoolWithFailover Get(const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + /** Allocates a PoolWithFailover to connect to MySQL. */ + PoolWithFailover Get(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + void reset(); + + + ~PoolFactory() = default; + PoolFactory& operator=(const PoolFactory &) = delete; + +private: + PoolFactory(); + + struct Impl; + std::unique_ptr impl; +}; + +} diff --git a/base/mysqlxx/include/mysqlxx/Value.h b/base/mysqlxx/include/mysqlxx/Value.h index 4b0c6c7cbfa..1ca83a8d3a6 100644 --- a/base/mysqlxx/include/mysqlxx/Value.h +++ b/base/mysqlxx/include/mysqlxx/Value.h @@ -239,7 +239,8 @@ template <> inline bool Value::get() cons template <> inline char Value::get() const { return getInt(); } template <> inline signed char Value::get() const { return getInt(); } template <> inline unsigned char Value::get() const { return getUInt(); } -template <> inline char8_t Value::get() const { return getUInt(); } +// crodriguez uncomment +//template <> inline char8_t Value::get() const { return getUInt(); } template <> inline short Value::get() const { return getInt(); } template <> inline unsigned short Value::get() const { return getUInt(); } template <> inline int Value::get() const { return getInt(); } diff --git a/base/mysqlxx/src/Pool.cpp b/base/mysqlxx/src/Pool.cpp index 410ac062039..10c599316b2 100644 --- a/base/mysqlxx/src/Pool.cpp +++ b/base/mysqlxx/src/Pool.cpp @@ -21,16 +21,20 @@ void Pool::Entry::incrementRefCount() { if (!data) return; - ++data->ref_count; - mysql_thread_init(); + ++(data->ref_count); + if(data->ref_count==1) + mysql_thread_init(); } void Pool::Entry::decrementRefCount() { if (!data) return; - --data->ref_count; - mysql_thread_end(); + if (data->ref_count > 0) { + --(data->ref_count); + if (data->ref_count==0) + mysql_thread_end(); + } } @@ -169,14 +173,24 @@ Pool::Entry Pool::tryGet() return Entry(); } +void Pool::removeConnection(Connection* connection) +{ + std::lock_guard lock(mutex); + if (connection) + { + if (connection->ref_count > 0) + { + connection->conn.disconnect(); + connection->ref_count = 0; + } + connections.remove(connection); + } +} + void Pool::Entry::disconnect() { - if (data) - { - decrementRefCount(); - data->conn.disconnect(); - } + pool->removeConnection(data); } diff --git a/base/mysqlxx/src/PoolFactory.cpp b/base/mysqlxx/src/PoolFactory.cpp new file mode 100644 index 00000000000..41e5944ebdc --- /dev/null +++ b/base/mysqlxx/src/PoolFactory.cpp @@ -0,0 +1,122 @@ +#include +#include +#include + +namespace mysqlxx +{ + +struct PoolFactory::Impl +{ + // Cache of already affected pools identified by their config name + std::map> pools; + + // Cache of Pool ID (host + port + user +...) cibling already established shareable pool + std::map pools_by_ids; + + /// Protect pools and pools_by_ids caches + std::mutex mutex; +}; + +PoolWithFailover PoolFactory::Get(const std::string & config_name, unsigned default_connections, + unsigned max_connections, size_t max_tries) +{ + return Get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries); +} + +/// Duplicate of code from StringUtils.h. Copied here for less dependencies. +static bool startsWith(const std::string & s, const char * prefix) +{ + return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); +} + +static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name) +{ + bool shared = config.getBool(config_name + ".share_connection", false); + + // Not shared no need to generate a name the pool won't be stored + if (!shared) + return ""; + + std::string entry_name = ""; + std::string host = config.getString(config_name + ".host", ""); + std::string port = config.getString(config_name + ".port", ""); + std::string user = config.getString(config_name + ".user", ""); + std::string db = config.getString(config_name + ".db", ""); + std::string table = config.getString(config_name + ".table", ""); + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_name, keys); + + if (config.has(config_name + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys replica_keys; + config.keys(config_name, replica_keys); + for (const auto & replica_config_key : replica_keys) + { + /// There could be another elements in the same level in configuration file, like "user", "port"... + if (startsWith(replica_config_key, "replica")) + { + std::string replica_name = config_name + "." + replica_config_key; + std::string tmp_host = config.getString(replica_name + ".host", host); + std::string tmp_port = config.getString(replica_name + ".port", port); + std::string tmp_user = config.getString(replica_name + ".user", user); + entry_name += (entry_name.empty() ? "" : "|") + tmp_user + "@" + tmp_host + ":" + tmp_port + "/" + db; + } + } + } + else + { + entry_name = user + "@" + host + ":" + port + "/" + db; + } + return entry_name; +} + +PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, unsigned default_connections, unsigned max_connections, size_t max_tries) +{ + + std::lock_guard lock(impl->mutex); + if (auto entry = impl->pools.find(config_name); entry != impl->pools.end()) + { + return *(entry->second.get()); + } + else + { + std::string entry_name = getPoolEntryName(config, config_name); + if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end()) + { + entry = impl->pools.find(id->second); + std::shared_ptr pool = entry->second; + impl->pools.insert_or_assign(config_name, pool); + return *pool; + } + + auto pool = std::make_shared(config, config_name, default_connections, max_connections, max_tries); + // Check the pool will be shared + if (!entry_name.empty()) + { + // Store shared pool + impl->pools.insert_or_assign(config_name, pool); + impl->pools_by_ids.insert_or_assign(entry_name, config_name); + } + return *(pool.get()); + } +} + +void PoolFactory::reset() +{ + std::lock_guard lock(impl->mutex); + impl->pools.clear(); + impl->pools_by_ids.clear(); +} + +PoolFactory::PoolFactory() : impl(std::make_unique()) {} + +PoolFactory & PoolFactory::instance() +{ + static PoolFactory ret; + return ret; +} + +} diff --git a/base/mysqlxx/src/PoolWithFailover.cpp b/base/mysqlxx/src/PoolWithFailover.cpp index bcdbcb3df72..8306922b0e5 100644 --- a/base/mysqlxx/src/PoolWithFailover.cpp +++ b/base/mysqlxx/src/PoolWithFailover.cpp @@ -15,6 +15,7 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & cfg const unsigned max_connections, const size_t max_tries) : max_tries(max_tries) { + shareable = cfg.getBool(config_name + ".share_connection", false); if (cfg.has(config_name + ".replica")) { Poco::Util::AbstractConfiguration::Keys replica_keys; @@ -48,7 +49,7 @@ PoolWithFailover::PoolWithFailover(const std::string & config_name, const unsign {} PoolWithFailover::PoolWithFailover(const PoolWithFailover & other) - : max_tries{other.max_tries}, config_name{other.config_name} + : max_tries{other.max_tries}, config_name{other.config_name}, shareable{other.shareable} { if (shareable) { @@ -88,7 +89,7 @@ PoolWithFailover::Entry PoolWithFailover::Get() try { - Entry entry = pool->tryGet(); + Entry entry = shareable ? pool->Get() : pool->tryGet(); if (!entry.isNull()) { diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.cpp b/dbms/src/Dictionaries/MySQLDictionarySource.cpp index 8df029f6c27..95d41d8d35a 100644 --- a/dbms/src/Dictionaries/MySQLDictionarySource.cpp +++ b/dbms/src/Dictionaries/MySQLDictionarySource.cpp @@ -46,6 +46,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) # include # include # include "readInvalidateQuery.h" +# include namespace DB { @@ -66,11 +67,11 @@ MySQLDictionarySource::MySQLDictionarySource( , update_field{config.getString(config_prefix + ".update_field", "")} , dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)} , sample_block{sample_block_} - , pool{config, config_prefix} + , pool{mysqlxx::PoolFactory::instance().Get(config, config_prefix)} , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} - , close_connection{config.getBool(config_prefix + ".close_connection", false)} + , close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)} { } @@ -114,19 +115,21 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate() BlockInputStreamPtr MySQLDictionarySource::loadAll() { - last_modification = getLastModification(); + auto connection = pool.Get(); + last_modification = getLastModification(connection, false); LOG_TRACE(log, load_all_query); - return std::make_shared(pool.Get(), load_all_query, sample_block, max_block_size, close_connection); + return std::make_shared(connection, load_all_query, sample_block, max_block_size, close_connection); } BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll() { - last_modification = getLastModification(); + auto connection = pool.Get(); + last_modification = getLastModification(connection, false); std::string load_update_query = getUpdateFieldAndDate(); LOG_TRACE(log, load_update_query); - return std::make_shared(pool.Get(), load_update_query, sample_block, max_block_size, close_connection); + return std::make_shared(connection, load_update_query, sample_block, max_block_size, close_connection); } BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector & ids) @@ -158,8 +161,8 @@ bool MySQLDictionarySource::isModified() const if (dont_check_update_time) return true; - - return getLastModification() > last_modification; + auto connection = pool.Get(); + return getLastModification(connection, true) > last_modification; } bool MySQLDictionarySource::supportsSelectiveLoad() const @@ -199,7 +202,7 @@ std::string MySQLDictionarySource::quoteForLike(const std::string s) return out.str(); } -LocalDateTime MySQLDictionarySource::getLastModification() const +LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry & connection, bool allow_connection_closure) const { LocalDateTime modification_time{std::time(nullptr)}; @@ -208,7 +211,6 @@ LocalDateTime MySQLDictionarySource::getLastModification() const try { - auto connection = pool.Get(); auto query = connection->query("SHOW TABLE STATUS LIKE " + quoteForLike(table)); LOG_TRACE(log, query.str()); @@ -233,6 +235,11 @@ LocalDateTime MySQLDictionarySource::getLastModification() const ++fetched_rows; } + if (close_connection && allow_connection_closure) + { + connection.disconnect(); + } + if (0 == fetched_rows) LOG_ERROR(log, "Cannot find table in SHOW TABLE STATUS result."); @@ -243,7 +250,6 @@ LocalDateTime MySQLDictionarySource::getLastModification() const { tryLogCurrentException("MySQLDictionarySource"); } - /// we suppose failure to get modification time is not an error, therefore return current time return modification_time; } diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.h b/dbms/src/Dictionaries/MySQLDictionarySource.h index 047bd860ee1..95e660d220f 100644 --- a/dbms/src/Dictionaries/MySQLDictionarySource.h +++ b/dbms/src/Dictionaries/MySQLDictionarySource.h @@ -62,7 +62,7 @@ private: static std::string quoteForLike(const std::string s); - LocalDateTime getLastModification() const; + LocalDateTime getLastModification(mysqlxx::Pool::Entry & connection, bool allow_connection_closure) const; // execute invalidate_query. expects single cell in result std::string doInvalidateQuery(const std::string & request) const; diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 8f4d79a5398..c53537b80cd 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -28,4 +28,12 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( bool dictionary_from_database = !repository_name.empty(); return DictionaryFactory::instance().create(name, config, key_in_config, context, dictionary_from_database); } + +void ExternalDictionariesLoader::resetAll() +{ + #if USE_MYSQL + mysqlxx::PoolFactory::instance().reset(); + #endif +} + } diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.h b/dbms/src/Interpreters/ExternalDictionariesLoader.h index 6bfa3ad5e85..68913ffa166 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.h +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.h @@ -28,6 +28,8 @@ public: return std::static_pointer_cast(tryLoad(name)); } + static void resetAll(); + protected: LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config, const std::string & repository_name) const override; diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml index 0a3a613dfdc..514c73f3be2 100644 --- a/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml @@ -1,5 +1,41 @@ + + dict0 + + + test + mysql1 + 3306 + root + clickhouse + test0
+ true + true +
+ + + + + + + id + UInt32 + CAST(id AS UNSIGNED) + + + id + Int32 + + + + value + String + (UNDEFINED) + + + 0 +
dict1 diff --git a/dbms/tests/integration/test_dictionaries_mysql/configs/remote_servers.xml b/dbms/tests/integration/test_dictionaries_mysql/configs/remote_servers.xml new file mode 100644 index 00000000000..b2b88a6e3c8 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_mysql/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + instance + 9000 + + + + + diff --git a/dbms/tests/integration/test_dictionaries_mysql/test.py b/dbms/tests/integration/test_dictionaries_mysql/test.py index 78aeb747dba..80424a3471a 100644 --- a/dbms/tests/integration/test_dictionaries_mysql/test.py +++ b/dbms/tests/integration/test_dictionaries_mysql/test.py @@ -9,13 +9,13 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -DICTIONARY_FILES = ['configs/dictionaries/mysql_dict1.xml', 'configs/dictionaries/mysql_dict2.xml'] +CONFIG_FILES = ['configs/dictionaries/mysql_dict1.xml', 'configs/dictionaries/mysql_dict2.xml', 'configs/remote_servers.xml'] cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) -instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES) +instance = cluster.add_instance('instance', main_configs=CONFIG_FILES, with_mysql = True) create_table_mysql_template = """ - CREATE TABLE `test`.`{}` ( + CREATE TABLE IF NOT EXISTS `test`.`{}` ( `id` int(11) NOT NULL, `value` varchar(50) NOT NULL, PRIMARY KEY (`id`) @@ -23,17 +23,19 @@ create_table_mysql_template = """ """ create_clickhouse_dictionary_table_template = """ - CREATE TABLE `test`.`dict_table_{}` (`id` Int32, `value` String) ENGINE = Dictionary({}) - ORDER BY `id` DESC SETTINGS index_granularity = 8192 + CREATE TABLE IF NOT EXISTS `test`.`dict_table_{}` (`id` Int32, `value` String) ENGINE = Dictionary({}) """ @pytest.fixture(scope="module") def started_cluster(): try: + #time.sleep(30) cluster.start() # Create a MySQL database - create_mysql_db(get_mysql_conn(), 'test') + mysql_connection = get_mysql_conn() + create_mysql_db(mysql_connection, 'test') + mysql_connection.close() # Create database in ClickHouse instance.query("CREATE DATABASE IF NOT EXISTS test") @@ -54,7 +56,7 @@ def test_load_mysql_dictionaries(started_cluster): for n in range(0, 5): # Create MySQL tables, fill them and create CH dict tables - prepare_tables('test' + n) + prepare_mysql_table('test', str(n)) # Check dictionaries are loaded and have correct number of elements for n in range(0, 100): @@ -62,34 +64,32 @@ def test_load_mysql_dictionaries(started_cluster): if (n % 10) == 0: query("SYSTEM RELOAD DICTIONARIES") - # Check number of rows - assert query("SELECT count() FROM `dict_table_`.{}".format('test' + (n % 5))).rstrip() == '10000' + # Check number of row + assert query("SELECT count() FROM `test`.`dict_table_{}`".format('test' + str(n % 5))).rstrip() == '10000' def create_mysql_db(mysql_connection, name): with mysql_connection.cursor() as cursor: - cursor.execute( - "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) + cursor.execute("CREATE DATABASE IF NOT EXISTS {} DEFAULT CHARACTER SET 'utf8'".format(name)) -def prepare_mysql_table(table_name): +def prepare_mysql_table(table_name, index): mysql_connection = get_mysql_conn() # Create table - create_mysql_table(mysql_connection, table_name) + create_mysql_table(mysql_connection, table_name + str(index)) # Insert rows using CH query = instance.query - query("INSERT INTO `clickhouse_mysql`.{}(id, value) select number, concat('{} value ', toString(number)) from numbers(10000) ".format(table_name, table_name)) - assert query("SELECT count() FROM `clickhouse_mysql`.{}".format(table_name)).rstrip() == '10000' + query("INSERT INTO `clickhouse_mysql`.{}(id, value) select number, concat('{} value ', toString(number)) from numbers(10000) ".format(table_name + str(index), table_name + str(index))) + assert query("SELECT count() FROM `clickhouse_mysql`.{}".format(table_name + str(index))).rstrip() == '10000' mysql_connection.close() #Create CH Dictionary tables based on MySQL tables - query(create_clickhouse_dictionary_table_template.format(table_name + n), 'dict' + n) + query(create_clickhouse_dictionary_table_template.format(table_name + str(index), 'dict' + str(index))) def get_mysql_conn(): - conn = pymysql.connect(user='root', password='clickhouse', host='mysql1', port=3308) + conn = pymysql.connect(user='root', password='clickhouse', host='127.0.0.10', port=3308) return conn def create_mysql_table(conn, table_name): with conn.cursor() as cursor: cursor.execute(create_table_mysql_template.format(table_name)) - From 6cd12d5806cae7a4e20ad98aa5a91dd536d35663 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Thu, 27 Feb 2020 14:23:55 +0300 Subject: [PATCH 075/215] fix comparator in MergeTreeSetIndex --- dbms/src/Interpreters/Set.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 5cbe59c0548..52207a0ebac 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -544,8 +544,9 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, auto compare = [](const IColumn & lhs, const ValueWithInfinity & rhs, size_t row) { auto type = rhs.getType(); + /// Return inverted infinity sign, because in 'lhs' all values are finite. if (type != ValueWithInfinity::NORMAL) - return static_cast(type); + return -static_cast(type); return lhs.compareAt(row, 0, rhs.getColumnIfFinite(), 1); }; @@ -599,7 +600,7 @@ void ValueWithInfinity::update(const Field & x) const IColumn & ValueWithInfinity::getColumnIfFinite() const { if (type != NORMAL) - throw Exception("Tring to get column of infinite type", ErrorCodes::LOGICAL_ERROR); + throw Exception("Trying to get column of infinite type", ErrorCodes::LOGICAL_ERROR); return *column; } From f3a22043f34e2baa1b2a4acdf741bafb69ead78b Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Thu, 27 Feb 2020 16:29:37 +0300 Subject: [PATCH 076/215] fix PVS warning --- dbms/src/Interpreters/Set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 8e6114bd8a7..f7a2f18d2ad 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -194,7 +194,7 @@ public: }; ValueWithInfinity(MutableColumnPtr && column_) - : column(std::move(column_)) {} + : column(std::move(column_)), type(NORMAL) {} void update(const Field & x); void update(Type type_) { type = type_; } From f1d864a03d2ec7ace4bf69e4067f950038999712 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 27 Feb 2020 18:03:29 +0300 Subject: [PATCH 077/215] add storage test and fix string generator --- dbms/src/Storages/StorageGenerate.cpp | 77 +++--- .../TableFunctions/TableFunctionGenerate.h | 3 +- .../01072_random_table_function.reference | 238 ------------------ .../01087_storage_generate.reference | 103 ++++++++ .../0_stateless/01087_storage_generate.sql | 17 ++ .../01087_table_function_generate.reference | 238 ++++++++++++++++++ ....sql => 01087_table_function_generate.sql} | 0 7 files changed, 397 insertions(+), 279 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/01072_random_table_function.reference create mode 100644 dbms/tests/queries/0_stateless/01087_storage_generate.reference create mode 100644 dbms/tests/queries/0_stateless/01087_storage_generate.sql create mode 100644 dbms/tests/queries/0_stateless/01087_table_function_generate.reference rename dbms/tests/queries/0_stateless/{01072_random_table_function.sql => 01087_table_function_generate.sql} (100%) diff --git a/dbms/src/Storages/StorageGenerate.cpp b/dbms/src/Storages/StorageGenerate.cpp index 781f5f36276..1b72255bc9f 100644 --- a/dbms/src/Storages/StorageGenerate.cpp +++ b/dbms/src/Storages/StorageGenerate.cpp @@ -81,10 +81,10 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); + pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); + UInt64 a = static_cast(generator()); data[i] = static_cast(a); } break; @@ -128,10 +128,10 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); + pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - Int64 a = static_cast(generator()) << 32 | static_cast(generator()); + Int64 a = static_cast(generator()); data[i] = static_cast(a); } break; @@ -155,12 +155,12 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); + pcg64_fast generator(random_seed); double d = 1.0; for (UInt64 i = 0; i < limit; ++i) { d = std::numeric_limits::max(); - data[i] = (d / pcg32::max()) * generator(); + data[i] = (d / pcg64::max()) * generator(); } break; } @@ -218,17 +218,29 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, for (UInt64 i = 0; i < limit; ++i) { offset += 1 + static_cast(generator()) % max_string_length; - offsets[i] = offset - 1; + offsets[i] = offset; } chars.resize(offset); for (UInt64 i = 0; i < offset; ++i) { - chars[i] = 32 + generator() % 95; + if (offset - i > 5 ) { + UInt32 r = generator(); + chars[i] = 32 + (r & 0x7F) % 95; + chars[i+1] = 32 + ((r >> 7) & 0x7F) % 95; + chars[i+2] = 32 + ((r >> 14) & 0x7F) % 95; + chars[i+3] = 32 + ((r >> 21) & 0x7F) % 95; + chars[i+4] = 32 + (r >> 28); + i+=4; + } + else { + UInt32 r = generator(); + chars[i] = 32 + (r % 95); + } } // add terminating zero char for (auto & i : offsets) { - chars[i] = 0; + chars[i-1] = 0; } } break; @@ -297,7 +309,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); + pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); @@ -309,11 +321,10 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); + pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - Int128 x = static_cast(generator()) << 96 | static_cast(generator()) << 32 | - static_cast(generator()) << 64 | static_cast(generator()); + Int128 x = static_cast(generator()) << 64 | static_cast(generator()); data[i] = x; } } @@ -322,11 +333,11 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); + pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); - UInt64 b = static_cast(generator()) << 32 | static_cast(generator()); + UInt64 a = static_cast(generator()); + UInt64 b = static_cast(generator()); auto x = UInt128(a, b); data[i] = x; } @@ -397,8 +408,9 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, StorageGenerate::StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_) - : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_), random_seed(random_seed_) + : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) { + random_seed = random_seed_ ? random_seed_ : randomSeed(); setColumns(columns_); } @@ -409,37 +421,24 @@ void registerStorageGenerate(StorageFactory & factory) { ASTs & engine_args = args.engine_args; - if (engine_args.size() < 1) - throw Exception("Storage Generate requires at least one argument: "\ - " structure(, max_array_length, max_string_length, random_seed).", + if (engine_args.size() > 3) + throw Exception("Storage Generate requires at most three arguments: "\ + "max_array_length, max_string_length, random_seed.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (engine_args.size() > 5) - throw Exception("Storage Generate requires at most five arguments: "\ - " structure, max_array_length, max_string_length, random_seed.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - /// Parsing first argument as table structure and creating a sample block - std::string structure = engine_args[0]->as().value.safeGet(); - UInt64 max_array_length_ = 10; UInt64 max_string_length_ = 10; UInt64 random_seed_ = 0; // zero for random /// Parsing second argument if present + if (engine_args.size() >= 1) + max_array_length_ = engine_args[0]->as().value.safeGet(); + if (engine_args.size() >= 2) - max_array_length_ = engine_args[1]->as().value.safeGet(); - - if (engine_args.size() >= 3) - max_string_length_ = engine_args[2]->as().value.safeGet(); - - if (engine_args.size() == 4) - random_seed_ = engine_args[3]->as().value.safeGet(); - - /// do not use predefined seed - if (!random_seed_) - random_seed_ = randomSeed(); + max_string_length_ = engine_args[1]->as().value.safeGet(); + if (engine_args.size() == 3) + random_seed_ = engine_args[2]->as().value.safeGet(); return StorageGenerate::create(args.table_id, args.columns, max_array_length_, max_string_length_, random_seed_); }); diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.h b/dbms/src/TableFunctions/TableFunctionGenerate.h index 0b228fc0e5e..0002acd54b1 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.h +++ b/dbms/src/TableFunctions/TableFunctionGenerate.h @@ -4,8 +4,7 @@ namespace DB { -/* random(structure, limit) - creates a temporary storage filling columns with random data - * random is case-insensitive table function +/* random(structure(, max_array_length, max_string_length, random_seed)) - creates a temporary storage that generates columns with random data */ class TableFunctionGenerate : public ITableFunction { diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.reference b/dbms/tests/queries/0_stateless/01072_random_table_function.reference deleted file mode 100644 index c0ca8469114..00000000000 --- a/dbms/tests/queries/0_stateless/01072_random_table_function.reference +++ /dev/null @@ -1,238 +0,0 @@ -UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -6067806098226807350 6067806098226807350 1412771199 1412771199 11647 11647 127 127 -533919876299229763 533919876299229763 1791099446 1791099446 566 566 54 54 -4640348442306208352 4640348442306208352 124312908 124312908 56652 -8884 76 76 -9035287264038165814 9035287264038165814 1968572995 1968572995 2627 2627 67 67 -3335561882147042384 3335561882147042384 1080415314 1080415314 54354 -11182 82 82 -1967107582972097042 1967107582972097042 2578637408 -1716329888 57952 -7584 96 96 -12006909317836685474 -6439834755872866142 2103691749 2103691749 51685 -13851 229 -27 -1211441240496071423 1211441240496071423 1218125110 1218125110 7478 7478 54 54 -11787857998754938359 -6658886074954613257 776621019 776621019 19419 19419 219 -37 -13395460454860618677 -5051283618848932939 4155847760 -139119536 13392 13392 80 80 -- -Enum8(\'hello\' = 1, \'world\' = 5) -world -hello -hello -world -hello -hello -world -hello -world -hello -- -Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) -['world','hello','hello','world','hello','hello','world','hello','world'] -['hello','hello','hello','hello','hello','world'] -['world','world','world','world','world','hello','hello','world'] -['hello','hello','world','hello','hello'] -['hello','world','world','world'] -['world','world','world','hello','hello','hello','world','hello'] -['world','world','hello','hello','hello','world','hello','hello','hello'] -[] -['hello','hello','hello','hello','world','hello','world','world','hello'] -[] -- -Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) -h -w -o -w -o -h -h -w -o -o -- -Date DateTime DateTime(\'Europe/Moscow\') -2001-11-21 2014-10-08 16:26:39 2014-10-08 16:26:39 -1971-07-21 2026-10-04 10:37:26 2026-10-04 10:37:26 -2106-02-07 1973-12-09 22:21:48 1973-12-09 22:21:48 -1977-03-12 2032-05-19 12:49:55 2032-05-19 12:49:55 -2106-02-07 2004-03-27 22:21:54 2004-03-27 22:21:54 -2106-02-07 2051-09-18 11:10:08 2051-09-18 11:10:08 -2106-02-07 2036-08-30 09:49:09 2036-08-30 09:49:09 -1990-06-23 2008-08-07 20:05:10 2008-08-07 20:05:10 -2023-03-03 1994-08-11 20:03:39 1994-08-11 20:03:39 -2006-09-01 2101-09-11 05:09:20 2101-09-11 05:09:20 -- -DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') -2026-10-04 10:37:26.199 2026-10-04 10:37:26.771199 2026-10-04 10:37:26.771199 -2032-05-19 12:49:55.908 2032-05-19 12:49:55.312908 2032-05-19 12:49:55.312908 -2051-09-18 11:10:08.314 2051-09-18 11:10:08.415314 2051-09-18 11:10:08.415314 -2008-08-07 20:05:10.749 2008-08-07 20:05:10.691749 2008-08-07 20:05:10.691749 -2101-09-11 05:09:20.019 2101-09-11 05:09:20.621019 2101-09-11 05:09:20.621019 -2062-07-10 05:18:58.924 2062-07-10 05:18:58.002924 2062-07-10 05:18:58.002924 -2074-03-12 07:52:50.424 2074-03-12 07:52:50.576424 2074-03-12 07:52:50.576424 -2075-04-13 03:14:55.643 2075-04-13 03:14:55.060643 2075-04-13 03:14:55.060643 -1990-06-27 22:41:59.565 1990-06-27 22:41:59.574565 1990-06-27 22:41:59.574565 -2071-06-01 23:38:13.679 2071-06-01 23:38:13.873679 2071-06-01 23:38:13.873679 -- -Float32 Float64 -1.1193126e38 5.913267578382113e307 -1.4190551e38 7.49679091079062e307 -9.849083e36 5.2032168334909443e306 -1.5596642e38 8.239620736359643e307 -8.559931e37 4.522165267798421e307 -2.043007e38 1.0793094445811738e308 -1.6667162e38 8.805171157988522e307 -9.650981e37 5.098560704291471e307 -6.1530254e37 3.25060979130477e307 -3.2926016e38 1.7394635336064728e308 -- -Decimal32(4) Decimal64(8) Decimal64(8) -141277.1199 60678060982.26807350 11193126615213725123159.9175630570523203 -179109.9446 5339198762.99229763 8559932011929819709498.3386368958995766 -12431.2908 46403484423.06208352 6153025631377423463914.5986917361218066 -196857.2995 90352872640.38165814 -11879398367401741707407.7196878679186689 -108041.5314 33355618821.47042384 -12283476719506904826004.1868293152015435 --171632.9888 19671075829.72097042 15831040503634978760989.7361741136110978 -210369.1749 -64398347558.72866142 9875885121509458147096.4410492055471380 -121812.5110 12114412404.96071423 600964137921042469886.3230431219622973 -77662.1019 -66588860749.54613257 -5499852289308439053567.7017000383452336 --13911.9536 -50512836188.48932939 -1027137932784200042018.0642287160896946 -- -Tuple(Int32, Int64) -(1412771199,6067806098226807350) -(1791099446,533919876299229763) -(124312908,4640348442306208352) -(1968572995,9035287264038165814) -(1080415314,3335561882147042384) -(-1716329888,1967107582972097042) -(2103691749,-6439834755872866142) -(1218125110,1211441240496071423) -(776621019,-6658886074954613257) -(-139119536,-5051283618848932939) -- -Array(Int8) -[127,54,76,67,82,96,-27,54,-37] -[80,-20,18,104,-94,99] -[-1,101,-9,79,-75,-106,-124,25] -[-126,76,-7,-28,20] -[50,109,25,61] -[119,75,-17,80,-26,-86,-1,78] -[87,87,-98,-52,-10,-55,90,-116,-110] -[] -[-8,30,126,-82,-83,-112,109,-37,46] -[] -- -Array(Nullable(Int32)) -[1412771199,1791099446,124312908,1968572995,1080415314,-1716329888,2103691749,1218125110,776621019] -[-139119536,458002924,-1375243758,-1499390872,-1006911326,282060643] -[-972627201,-1550392731,646512119,-1176093617,-1094550603,1998158230,-1491853180,-1004684263] -[-1087003262,1246511948,-287482375,1245659620,-447199980] -[75852338,-10461331,924798233,-707845059] -[-694178953,-1402336437,497728751,1937926992,-129643034,982571178,2100766207,-1068740018] -[473070167,-1859986345,611197598,-1197393460,640993526,1545236681,654453082,1474541196,293114258] -[] -[-894672648,-272503522,-1437033090,-1467734354,-1327634003,176306576,1752494957,-228452133,1262726958] -[] -- -Tuple(Int32, Array(Int64)) -(1412771199,[6067806098226807350,533919876299229763,4640348442306208352,9035287264038165814,3335561882147042384,1967107582972097042,-6439834755872866142,1211441240496071423,-6658886074954613257]) -(1791099446,[-5051283618848932939,8582024252886360196,-4315086049182898814,5353728054740737529,5350067333695554836,325783315319644013]) -(124312908,[3971978169720710205,-2981475897813890229,2137728709761854288,-556812590201644886,9022722158833193550,2031820898413239383,2625073697901328844,2753046232662962377]) -(1968572995,[2810854585430947468,1258916155501601016,-1170393712176882306,-6303871046678353491,757230979742233453]) -(1080415314,[-981194438673715410,-5187418663246381396,-4552736212124933548,-595622618346691759]) -(-1716329888,[-5756535245380317560,-3316827179903536443,1131631900550093840,-976331946980643879,-5646085236330506185,2420896254251217582,826917553648174074,-3966953861420592496]) -(2103691749,[-5286631142891306752,-7301789732104984637,1910103380304709263,-5245849785367812780,-1186131377522014764,6186892281648022452,-4757957517150938497,681345985121370407,8244427342487150660]) -(1218125110,[]) -(776621019,[-7434954659106758677,3335701458961542184,-5391405201124517858,-1030158175555181024,-4227500256565221046,-2681544507147310992,-5063942269050242472,-4220143753454148778,-4074029704161347282]) -(-139119536,[]) -- -Nullable(String) -G}8MF We -\0RD7LF -\0q_k7J2t -\0TPO- -\0xD: -\0:77y{h< -\0idoXZ4#[ - -\0>i, mD s - -- -Array(String) -['oXZ4#[)w','\0i, mD','\0){A\\H$%','\0e3}K','\0"f3','\0}QNhc1\'','\0$Zk>cmnX','','\0JZ+O|fh\\'] -['','\0au#','\0P?>\\P5o','\0>T:','','\07d','\0','','\0Fl0FG'] -['\0=CKI?m0','\0wk\\:','\0f6','\0XSz~LH'] -['\0;<','\0Mj7D(lyv','\0','\0[','\0p','\0UZisIY(','\0{&^jL3','\0*8#pXRA'] -['\0Hdz-II','\0','\0SHd6k\'W','\0qco~]','\0Uc5=H','\0','\0Q','\0O91 3','\0>OeC./z'] -[] -['\0,xV{H.R','\0~~m','\0WKKk4','\06','\0bt','\04i, mD s - -- -[127] 141277.1199 ('2026-10-04 10:37:26.199','54352d7f-6ac2-0236-0768-dd4c75560a43') -[] 179109.9446 ('2032-05-19 12:49:55.908','4065d452-99b2-e260-7d63-c9e5489b1d36') -[] 12431.2908 ('2051-09-18 11:10:08.314','2e4a4bdb-f7b5-3450-1b4c-91ecae077212') -[54] 196857.2995 ('2008-08-07 20:05:10.749','a6a11c68-c3fb-c0a2-10cf-e763c606e2ff') -[] 108041.5314 ('2101-09-11 05:09:20.019','a396e265-2688-fdf7-b9e6-3c4fbec27bb5') -[] -171632.9888 ('2062-07-10 05:18:58.924','77197996-a714-2084-c41d-bc19bf35a582') -[76] 210369.1749 ('2074-03-12 07:52:50.424','4a4c434c-eedd-5df9-4a3f-41e4e5584514') -[] 121812.5110 ('2075-04-13 03:14:55.643','04856a32-ff60-5f6d-371f-4d19d5cf243d') -[67] 77662.1019 ('1990-06-27 22:41:59.565','d69fab77-ac6a-0b4b-1daa-bcef73826b50') -[] -13911.9536 ('2071-06-01 23:38:13.679','f845cde6-3a90-d8aa-7d37-25ffc04c524e') -- -[-59] 1234817989 )/V 123481.7989 o 5.168430093085938e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4999d3c5-45da-a6b2-1065-b3e73d9ccab8') Ų -[-78] 1171957426 \0 117195.7426 w 4.905322146512668e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','0ac83dd8-0814-70ac-2abb-3e5f3c551e16') -[-25,-72] 275100647 \09 27510.0647 w 1.151455903014231e307 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','82477b26-9dc9-7152-130b-59411e993423') ج -[] 1033685688 \0ih 103368.5688 w 4.326574656543525e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','8ac94f92-d11e-acc7-100b-19bb11077748') _ -[-40] 180895192 \0B 18089.5192 h 7.571513877802428e306 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','73ddaf77-aff2-ffb2-57de-3fc6327b320d') &R -[] 135557292 \0J 13555.7292 w 5.673859577292225e306 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','3c121527-9cce-c3b9-0ba0-3c51161d4a6b') A# -[-84,95] 716914271 \0 71691.4271 h 3.000702391289156e307 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','2bc298f5-774d-dbc1-93fa-89c545e60e45') -[22,38] 1012211222 \0d 101221.1222 h 4.236691550453344e307 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','d48d80b0-2e2f-7e86-e46d-8a57a55cf8b4') H -[82,65] 2185722662 \0m -210924.4634 w 9.148518147657713e307 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','d9eb0e16-766c-a7ac-1693-a3d3ac9aa0d8') w -[35] 2647224658 \0]$ -164774.2638 o 1.108017190180919e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','c7253454-2bbb-be98-fe97-15aa162839dc') \r -- diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.reference b/dbms/tests/queries/0_stateless/01087_storage_generate.reference new file mode 100644 index 00000000000..1038fb2baec --- /dev/null +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.reference @@ -0,0 +1,103 @@ +65536 +- +[-59] 123481.7989 ('2007-02-20 10:43:46.989','4024497c-b1f5-5b35-44e7-4cf478103f02') +[-78] 117195.7426 ('2002-10-04 02:54:48.647','a4739e76-3fbd-b537-34b8-c2b4c272296f') +[-25,-72] 27510.0647 ('1974-04-19 01:48:12.192','1e8f333f-d6c7-cd48-7a23-dd0a93d8418f') +[] 103368.5688 ('2002-01-28 12:47:02.271','f8cf0cb7-8a30-acab-b618-22f5db288237') +[-40] 18089.5192 ('2053-11-20 07:10:58.662','fe7c4fab-b30e-0db2-fb21-64a726f71730') +[] 13555.7292 ('1986-04-08 19:07:15.849','dd091972-50eb-59db-242f-0c7311995279') +[-84,95] 71691.4271 ('2081-03-06 04:00:55.914','c87feb6e-decf-2422-14d3-3312e8eef7b3') +[22,38] 101221.1222 ('1979-01-20 20:39:20.939','da4f0e49-9267-2228-10d9-dcd46ccff515') +[82,65] -210924.4634 ('2063-07-18 01:46:10.215','400d5020-f53c-224d-0e25-215f73801b7e') +[35] -164774.2638 ('1996-11-02 14:35:41.110','8a9107d8-e5ce-78a0-fa35-9dc406168d47') +[-110] 31951.0849 ('2053-05-14 03:43:37.023','2555c8e2-d792-ae98-10bd-fe81c5e641cc') +[] 51335.6835 ('1981-10-04 07:39:07.577','8f493375-6f27-a1fd-b842-4acd2a947bd2') +[] -196651.8382 ('2033-06-05 16:33:53.429','88b7da3c-3764-d8e1-899d-309d5ad39592') +[-57] -78651.8841 ('2007-03-01 02:20:37.021','86d48a21-34e6-a5e1-90d4-82d31ac7241c') +[-69,72] 26916.2939 ('1994-07-22 12:08:38.312','fd121990-fe32-068d-8db5-54d21f0a57de') +[119,-78] 28570.1960 ('2057-11-30 11:13:40.087','613f329c-8a56-3c7e-1204-896646a3e2dd') +[] 194390.8215 ('2032-12-16 20:51:40.438','0ddc325a-2efd-2077-c088-2022848fb2d1') +[-58] -134302.9326 ('2061-10-06 12:01:12.483','8562e417-91f0-0001-9b06-ad439901f8b3') +[] 147418.3110 ('1993-04-02 08:18:32.212','3cbe5bb9-b32b-4bbe-8d62-134814725e48') +[13,39] 84693.4541 ('1981-10-12 14:43:24.346','dbfd2279-6cb4-c759-12d9-087f286a9c4d') +[] 100781.8023 ('1974-09-17 01:22:36.779','df51bec4-2bce-6677-dea6-5b6475de0ba5') +[-71] -166417.1079 ('2073-02-07 14:31:17.292','6a41ce1b-e8dd-72cb-fba0-181c08589288') +[] 19505.0577 ('1986-07-21 04:27:39.201','9b96ad62-efcf-9072-f13b-d52a37f1a3ab') +[81] 37101.8347 ('1977-10-29 19:01:33.177','a0233bd5-395e-83b3-5110-09f07aa65dbe') +[107] 73417.3429 ('2020-01-24 07:07:28.482','1b2c847d-9d2a-9f27-bc34-83cb7f72865b') +[-11,-63] 200159.1233 ('2020-01-30 04:35:18.644','ef21c730-5836-e387-f7f1-40f10d68ca57') +[-59,69] -181229.7275 ('2064-06-03 23:24:18.831','9ddbdfb5-ee6f-de37-dfd5-2eb2abc28aa3') +[-80] 117270.4837 ('2025-07-05 23:52:10.553','14ad622b-6ac9-d754-dbbb-4175b36e7c89') +[] -72892.3984 ('2004-12-18 09:50:49.468','05e59bb3-0e6d-d0ef-9ab7-b9317d2f2766') +[-122,87] 77486.4518 ('2028-11-09 07:52:54.371','7e8dd973-c940-2376-73aa-e49d91515247') +[-76] -46258.3209 ('2004-11-06 16:14:05.356','37535aa6-da8a-4061-0a54-841921e5c738') +[22] -152063.3676 ('2056-01-04 23:10:33.111','aa562a59-a034-09a8-6e5a-645bae68fc48') +[-84,-45] -63890.6858 ('2105-08-23 19:09:37.313','5bd71372-4206-ec00-3437-e4fb80e102fc') +[-40] 198683.2300 ('2103-03-12 03:53:10.401','e5022161-1839-1090-2ab1-b838c6ffab4f') +[] 37877.4483 ('2082-06-07 23:24:18.812','c85042c0-39f7-11f1-818c-1e23b5e00c2e') +[] -139915.2424 ('2024-02-04 00:07:50.053','f9cb5e7b-e089-fdd2-657e-086d358724be') +[] -95386.3084 ('1981-01-11 00:05:50.229','54ec5688-abd1-90e1-858f-a8a438f0d849') +[84,-104] 73372.4312 ('2076-08-06 12:43:39.780','20b4adfb-1be0-fd03-72ae-b99f240a8966') +[-86,-36] -2365.2950 ('2001-09-19 08:39:23.076','573ee213-d5c4-a84a-dfe3-ada8f7da77a2') +[] 37173.5004 ('2078-03-06 07:00:01.036','c0ef8e39-8c05-7d05-57a1-8619d16ed0a5') +[] 46211.8779 ('1976-07-28 03:24:19.781','4f7a49fa-f019-1cd4-2d52-21498c4b045b') +[] 14860.2156 ('1991-04-27 19:46:05.572','84f42bdf-87a2-0d6d-a9f3-b7b4c1b580e2') +[123] -105538.4004 ('1997-11-23 06:16:09.365','fa7cfe86-97d2-fe17-7f15-92bc9e23152a') +[44,60] -104127.4619 ('2079-08-19 02:49:25.286','942117e0-eb15-8f8b-f99a-bca4a0287384') +[5] 24776.2201 ('2075-06-16 22:44:08.959','f521453f-bf21-e08a-6342-24b51fb90c57') +[] 52228.9659 ('2103-08-12 09:20:19.320','278ec6db-d590-8174-c652-55a3095d6944') +[] 82221.0177 ('2040-04-25 21:25:27.868','f5cd7060-2008-b90d-091b-541c0c78222b') +[] 24698.8893 ('1982-08-27 15:16:17.588','ee758de3-6f8b-8292-4749-7b94ae95e935') +[] -149720.1814 ('2068-04-30 18:40:02.978','980caaa3-0c07-4ba5-6e59-d8d9bf743a00') +[25] 157983.8848 ('2044-07-03 11:35:24.630','0635f169-ad37-2554-cfd5-55bc16422776') +[] 15685.6644 ('1989-02-11 00:41:34.105','5bbcbd57-dcc0-bb3c-6e57-510a128a6ef1') +[] 158034.8118 ('1999-12-16 05:44:17.565','e8cdefb3-d4e5-359f-196c-b936eecbdd5a') +[] 25575.9831 ('2000-11-22 21:53:04.462','0b0679fa-c5a7-1e42-9314-201bde1d4887') +[] -131521.7038 ('2068-07-22 12:38:12.718','90503482-35b5-ceb1-8b8c-1f39ed0c0794') +[-5,-127] -129970.8743 ('2019-01-12 22:34:10.162','7638ba6e-5cb2-226b-e2d8-1ad44db1a63b') +[93] 175174.8730 ('2067-02-19 09:03:20.655','279568d0-0e13-38b7-d730-da3366ebaf72') +[] 42444.6468 ('2092-09-30 17:53:17.987','49576daf-0b31-fb3c-6199-628ed5d84004') +[106] 110335.2649 ('2069-01-25 06:39:42.982','d8e68488-f870-fa61-b52c-c0fa8be53385') +[-128] -87556.7925 ('2087-08-05 12:06:19.275','d562d3f5-1a2c-4453-f62a-8a17e841aa1c') +[] 185735.8374 ('2011-11-15 14:26:49.202','0dd6dcb7-3157-b1db-3703-17cd7af3fca3') +[68,-42] 170844.0356 ('2054-08-20 04:32:45.269','9bbe3850-5d18-26b7-b063-3bdba428e87b') +[-41,114] 109974.6845 ('2025-09-13 00:57:59.049','5a496c7e-4354-a8b2-bfd7-e3dd39ba7c75') +[-71,122] 136722.5111 ('2080-04-23 02:18:55.233','8e6a19d8-5088-89d8-4d47-98dced3729f7') +[] -158072.5063 ('2009-07-20 22:51:59.310','0b4699b4-3152-6de8-bc97-856990eba662') +[4,73] 119730.4313 ('2069-03-26 07:01:04.910','6fa07110-c24b-2205-ca3e-e95cbc6ab99a') +[-53] -1448.0319 ('2048-05-08 01:42:42.513','aed73313-3fe7-20ab-74fc-1974deb5feea') +[38,36] -81638.8895 ('1977-11-05 20:56:01.160','3cfa66a6-e302-5857-f14e-d1c95f79304e') +[29] -9186.3306 ('2050-10-28 21:34:49.604','00446ede-1f63-f29b-861b-907ef784c10d') +[] 51541.3812 ('2007-04-05 14:53:34.957','bc06c623-b8b4-c509-822d-c4ffabfb3bb1') +[] -74687.7838 ('2022-03-01 23:37:40.261','39d28440-5447-2e11-58d8-3e996d180710') +[23,-71] -128826.5243 ('2043-12-19 01:47:45.951','bc0d838d-111b-1094-87fa-390ae50f798b') +[-7,65] 170699.4470 ('2036-08-08 20:44:43.440','083e90ba-4432-8f42-a768-28c2a5193705') +[-31] 25454.4229 ('2009-01-09 14:19:10.304','1d853ca8-8103-e9e0-0156-6a0c2cf1bebf') +[-10,52] 34800.8750 ('2029-12-27 00:48:20.183','c9b0b807-0a93-9051-eadd-e6c0f8f1870a') +[114,-27] 55543.7780 ('2055-02-27 10:34:17.324','03f59f39-7653-5537-8075-8af52335f89e') +[] -93103.4677 ('1998-12-07 09:19:18.592','5a7dc701-41c4-1d1c-d606-eec028da641a') +[] 154376.9076 ('2095-09-09 15:30:44.873','7825f0a7-3941-ded3-15ff-30614e66b184') +[38,101] 100087.4363 ('2018-08-10 11:21:25.476','f7188a9c-6256-0e15-b523-05f7381aa04f') +[46] -206787.1260 ('2083-05-07 15:59:28.021','f234dc7e-b529-2fa2-395a-4500cffb908b') +[-44] -88120.2495 ('2016-01-21 05:26:08.114','54bacc39-3b31-1a61-6001-d68df0b86006') +[-53] 142694.1781 ('2101-02-19 06:19:46.823','c1768c3a-2b5d-98ad-4389-21bb557ffdef') +[-12] 20736.1459 ('1970-07-26 11:10:41.972','aa68eaa8-26ed-bdfc-9ebf-a3e838688f16') +[123] 126601.4572 ('2081-05-25 01:26:41.416','af4c25a2-5dde-fc3d-07ee-56a89deca2ca') +[-28,-63] 67277.0765 ('1996-09-26 12:48:08.251','69fb36c6-bffa-3499-1da9-d1488fe076cc') +[85,-77] 85443.0365 ('2018-10-16 22:39:35.338','b9bed16e-b0b3-1718-efab-83350513300c') +[] 88025.4969 ('2014-11-03 20:06:23.407','f66ceccc-b716-0ea6-cc08-0bcabbc32f82') +[] 41636.0286 ('2094-10-08 03:09:52.518','a97f04ea-d317-c160-979d-75077f536c92') +[108] -83533.9131 ('2080-09-15 20:01:54.317','15f62089-2425-dea7-359f-5d0c5fc76a3c') +[] 80004.7959 ('2003-05-06 13:18:36.664','c62de05e-ff2e-776d-358b-1afa4a914d8a') +[-51,-99] -96702.7448 ('2103-01-05 12:14:12.415','927c495e-b2b2-0db2-c514-0cc92ad57275') +[-7,94] 141107.4320 ('2086-04-08 19:12:41.784','f2f7aa7b-625e-e5a4-df9c-a3299732b43b') +[-123] -7862.4477 ('2037-11-17 09:32:32.517','b4d10717-04b8-89b5-ecb0-1886ad3eab71') +[] 117563.9868 ('2009-07-13 18:40:42.823','e5fbd00e-f8e5-fc29-fb38-3cfccae29cca') +[] -207597.6169 ('2054-05-27 09:25:02.389','c94806f0-079b-1b4f-08cb-5665cfd7f0ba') +[87] 37163.8588 ('2061-02-02 04:33:21.217','6a47246b-0b6d-2621-6ff4-e141c473558b') +[8,16] 39929.4977 ('2084-02-16 03:32:41.835','9fefe1e3-5cfe-9b6e-79f8-1bee729f57d9') +[] 57278.1978 ('2060-07-18 15:52:12.277','1b8aa41f-d775-655a-5ef0-0e2516ab73e0') +[35,60] -119194.1294 ('2027-11-17 12:54:40.246','8e0e1ff5-c982-27df-241b-ccb1f88b0965') +[] -24783.7666 ('2072-11-04 04:24:25.367','bcbe9529-7213-9aa2-9648-74a913f0a940') +[] -194381.9572 ('2094-02-21 01:42:11.385','2a51b55d-6f25-ad4b-398d-f27fb0241859') +- diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.sql b/dbms/tests/queries/0_stateless/01087_storage_generate.sql new file mode 100644 index 00000000000..02a0dc732f4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Generate(); +SELECT COUNT(*) FROM test_table LIMIT 100; + +DROP TABLE IF EXISTS test_table; + +SELECT '-'; + +DROP TABLE IF EXISTS test_table_2; +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Generate(3, 5, 10); + +SELECT * FROM test_table_2 LIMIT 100; + +SELECT '-'; + +DROP TABLE IF EXISTS test_table_2; + diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference new file mode 100644 index 00000000000..4d9fe812c05 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference @@ -0,0 +1,238 @@ +UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 +972365100324636832 972365100324636832 1412771199 1412771199 11647 11647 127 127 +3152476261539479119 3152476261539479119 1791099446 1791099446 566 566 54 54 +4963323010661987954 4963323010661987954 124312908 124312908 56652 -8884 76 76 +5960170508884574548 5960170508884574548 1968572995 1968572995 2627 2627 67 67 +1945837313970117551 1945837313970117551 1080415314 1080415314 54354 -11182 82 82 +5416802321970971533 5416802321970971533 2578637408 -1716329888 57952 -7584 96 96 +14123053850434978662 -4323690223274572954 2103691749 2103691749 51685 -13851 229 -27 +5897270072098692621 5897270072098692621 1218125110 1218125110 7478 7478 54 54 +14395125118590964207 -4051618955118587409 776621019 776621019 19419 19419 219 -37 +6099213765121594905 6099213765121594905 4155847760 -139119536 13392 13392 80 80 +- +Enum8(\'hello\' = 1, \'world\' = 5) +world +hello +hello +world +hello +hello +world +hello +world +hello +- +Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) +['world','hello','hello','world','hello','hello','world','hello','world'] +['hello','hello','hello','hello','hello','world'] +['world','world','world','world','world','hello','hello','world'] +['hello','hello','world','hello','hello'] +['hello','world','world','world'] +['world','world','world','hello','hello','hello','world','hello'] +['world','world','hello','hello','hello','world','hello','hello','hello'] +[] +['hello','hello','hello','hello','world','hello','world','world','hello'] +[] +- +Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) +h +w +o +w +o +h +h +w +o +o +- +Date DateTime DateTime(\'Europe/Moscow\') +2001-11-21 2014-10-08 16:26:39 2014-10-08 16:26:39 +1971-07-21 2026-10-04 10:37:26 2026-10-04 10:37:26 +2106-02-07 1973-12-09 22:21:48 1973-12-09 22:21:48 +1977-03-12 2032-05-19 12:49:55 2032-05-19 12:49:55 +2106-02-07 2004-03-27 22:21:54 2004-03-27 22:21:54 +2106-02-07 2051-09-18 11:10:08 2051-09-18 11:10:08 +2106-02-07 2036-08-30 09:49:09 2036-08-30 09:49:09 +1990-06-23 2008-08-07 20:05:10 2008-08-07 20:05:10 +2023-03-03 1994-08-11 20:03:39 1994-08-11 20:03:39 +2006-09-01 2101-09-11 05:09:20 2101-09-11 05:09:20 +- +DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') +2026-10-04 10:37:26.199 2026-10-04 10:37:26.771199 2026-10-04 10:37:26.771199 +2032-05-19 12:49:55.908 2032-05-19 12:49:55.312908 2032-05-19 12:49:55.312908 +2051-09-18 11:10:08.314 2051-09-18 11:10:08.415314 2051-09-18 11:10:08.415314 +2008-08-07 20:05:10.749 2008-08-07 20:05:10.691749 2008-08-07 20:05:10.691749 +2101-09-11 05:09:20.019 2101-09-11 05:09:20.621019 2101-09-11 05:09:20.621019 +2062-07-10 05:18:58.924 2062-07-10 05:18:58.002924 2062-07-10 05:18:58.002924 +2074-03-12 07:52:50.424 2074-03-12 07:52:50.576424 2074-03-12 07:52:50.576424 +2075-04-13 03:14:55.643 2075-04-13 03:14:55.060643 2075-04-13 03:14:55.060643 +1990-06-27 22:41:59.565 1990-06-27 22:41:59.574565 1990-06-27 22:41:59.574565 +2071-06-01 23:38:13.679 2071-06-01 23:38:13.873679 2071-06-01 23:38:13.873679 +- +Float32 Float64 +1.1193126e38 9.476003236390048e306 +1.4190551e38 3.072187108218657e307 +9.849083e36 4.836914127890829e307 +1.5596642e38 5.808373317056589e307 +8.559931e37 1.8962795639737924e307 +2.043007e38 5.278843956528775e307 +1.6667162e38 1.3763359457240068e308 +9.650981e37 5.747074866263327e307 +6.1530254e37 1.4028501451406057e308 +3.2926016e38 5.943875336376322e307 +- +Decimal32(4) Decimal64(8) Decimal64(8) +141277.1199 34587215859.56486735 1793697015189548808800.3321186298199631 +179109.9446 91310388980.88247636 9155714933283527596575.2265933417808212 +12431.2908 54718336176.75535245 3589436293988117804903.9923828992983949 +196857.2995 66269939664.00937485 -7975800700274615666991.6205621121901043 +108041.5314 -29591948098.91367399 -7473917794926308807002.4588259571608039 +-171632.9888 -3616165358.65719375 16693382269433918843239.6450094159824305 +210369.1749 80507399843.63587111 4905995755279397551053.6888361872164391 +121812.5110 -34656882097.63586027 12093397887518858443128.8806284162773013 +77662.1019 -5854815277.05485807 14924696978427516066647.5550554696171025 +-13911.9536 -18527474.08621639 2233902906033696342890.3204716019302329 +- +Tuple(Int32, Int64) +(1412771199,972365100324636832) +(1791099446,3152476261539479119) +(124312908,4963323010661987954) +(1968572995,5960170508884574548) +(1080415314,1945837313970117551) +(-1716329888,5416802321970971533) +(2103691749,-4323690223274572954) +(1218125110,5897270072098692621) +(776621019,-4051618955118587409) +(-139119536,6099213765121594905) +- +Array(Int8) +[127,54,76,67,82,96,-27,54,-37] +[80,-20,18,104,-94,99] +[-1,101,-9,79,-75,-106,-124,25] +[-126,76,-7,-28,20] +[50,109,25,61] +[119,75,-17,80,-26,-86,-1,78] +[87,87,-98,-52,-10,-55,90,-116,-110] +[] +[-8,30,126,-82,-83,-112,109,-37,46] +[] +- +Array(Nullable(Int32)) +[1412771199,1791099446,124312908,1968572995,1080415314,-1716329888,2103691749,1218125110,776621019] +[-139119536,458002924,-1375243758,-1499390872,-1006911326,282060643] +[-972627201,-1550392731,646512119,-1176093617,-1094550603,1998158230,-1491853180,-1004684263] +[-1087003262,1246511948,-287482375,1245659620,-447199980] +[75852338,-10461331,924798233,-707845059] +[-694178953,-1402336437,497728751,1937926992,-129643034,982571178,2100766207,-1068740018] +[473070167,-1859986345,611197598,-1197393460,640993526,1545236681,654453082,1474541196,293114258] +[] +[-894672648,-272503522,-1437033090,-1467734354,-1327634003,176306576,1752494957,-228452133,1262726958] +[] +- +Tuple(Int32, Array(Int64)) +(1412771199,[972365100324636832,3152476261539479119,4963323010661987954,5960170508884574548,1945837313970117551,5416802321970971533,-4323690223274572954,5897270072098692621,-4051618955118587409]) +(1791099446,[6099213765121594905,9049500661325627565,6490610108199929265,2659545628039292992,1049436691901089319,6555844131189778155]) +(124312908,[-3618886197108527083,8090694443849478311,-6748112818340381167,1211001191921707623,1583062447640133561,3619956371411508974,6210395553935478118,7999260120568553136]) +(1968572995,[-8213617876379062675,2929679139336823999,-6898532963881595768,2859043124745757119,181602243649873688]) +(1080415314,[7459507076057491211,2235978668844106654,7747732820502187351,-7347147272488840513]) +(-1716329888,[-3574726218986141421,3981338983611724340,8771848629812837701,-6576737000659768488,6769293131084427761,-8327622552204843394,1747121705533891044,4459867665041030928]) +(2103691749,[7974359543614958207,-973852668603249740,8242686802579364489,-1858485842996203811,-7644694849238348395,121039232775176155,2267202361603555760,-7133497827826495528,-4966062503744808160]) +(1218125110,[]) +(776621019,[-4132303197660648388,-402394423460281791,3022356521898683902,48062283965211565,36155406469293327,-2534696891053843107,-6385296537632895626,5203636353956926927,5117509146781816226]) +(-139119536,[]) +- +Nullable(String) +-C 2% +\0)X +\0 $n +\0e +\0 +\0 o9 +\08 64 + +\0 99 + +- +Array(String) +['UL L> ','\0)} ','\0 #x ','\0f ','\0 ','\0 `& ','\0& WZ ','','\0 d^ '] +['','\0 %','\0 WP ','\0 ','','\0 '] +['\0 ;','\0 <8','\0 0E =Y','\0 " -I','\0 c','','\0 (9 ','\0- '] +['\0*? ','\0tj k\'','\0','','\0 `#'] +['\0 T\' x','\0 @T','\0 0','\0 G" '] +['\0M ','\0d` A9 ','\0','\0 ','\0m','\0 Hx ','\0 >P ','\08i @l'] +['\0 +m ','\0','\0 ;; 1','\0 v& ','\0x5 ','\0','\0 ','\0 1g','\0 N3 @'] +[] +['\0 ;\' ','\0 ','\0 %4','\0 ','\0 ','\0/ J','\0 43 ','\0T ','\0/) u#'] +[] +- +UUID +0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f +44e143fb-3c07-b672-52b6-c77fded30154 +1b0100ba-41e3-9baf-4b2c-59032b97438d +c3ff2aa6-5ae3-c366-51d7-4fe2affc1e0d +c7c5c206-d6ea-d1ef-54a4-c29ab73ed619 +7d9648e7-b8fa-00ad-5a13-478132d105b1 +24e89aa8-6739-a840-0e90-591c73068627 +5afb0980-c265-66eb-cdc7-222d6ac83015 +7047ebba-f7df-f0a7-a259-e3250e8fbe11 +10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9 +- +Array(Nullable(UUID)) +['0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f','44e143fb-3c07-b672-52b6-c77fded30154','1b0100ba-41e3-9baf-4b2c-59032b97438d','c3ff2aa6-5ae3-c366-51d7-4fe2affc1e0d','c7c5c206-d6ea-d1ef-54a4-c29ab73ed619','7d9648e7-b8fa-00ad-5a13-478132d105b1','24e89aa8-6739-a840-0e90-591c73068627','5afb0980-c265-66eb-cdc7-222d6ac83015','7047ebba-f7df-f0a7-a259-e3250e8fbe11'] +['10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9','323cab23-f465-1eee-562f-c1da2afd4d66','6f0314b2-a06f-deb0-8e03-5e0f1da0ae6d','28a84fa7-b60b-24bf-a043-7cd1c083cc88','27ad5c92-f7fe-61bf-0285-2e47472a2718','67857e29-dbeb-670b-1f07-cac0a9cdc39e'] +['6b8579f3-74ab-0d57-9a09-b07b86d292bf','ce640571-8c0e-4913-3740-8eb628977234','79bbddda-837d-bd45-a4ba-bc8afe81ab58','5df15c3c-cac7-71f1-8c6e-5769104c667e','183f05ef-3db8-e1e4-3de4-a20310057f10','6eaa9dc1-446a-6a7f-f27c-2e2858564bb4','7263e7f9-ae8b-1a89-e635-54f8f41e6edd','95e89689-b6e3-cd95-01ae-048a79733fdb'] +['1f76b889-254e-81b0-9d00-b9853e618fd8','bb150076-d3e7-5b20-c6a7-1c22b739ec3c','fa6a6867-620d-4241-29f1-913e585b2dfe','00aac064-82d7-23ad-0080-73268f4fe50f','dcd2f29b-6372-dd5d-a762-dea138dc0d76'] +['483707b1-a1e9-55cf-4705-0b748cbe15a2','95696803-f67d-78a2-368a-fa0468a6508a','05c19621-f1a0-0e84-6f41-0b3770eca9ab','859e6ea8-1d2c-a3e7-7f0c-47b4f3dcb8a8'] +['eae86216-0177-15bb-b3e7-11879fc325bc','9c92bccb-abf1-9455-bb7b-e6bdc168dd7c','8ce11561-8eb4-77f3-4a8a-94932decef27','d7c09609-7dca-1d83-0a0c-47a3e6fada0c','8d2ec2d8-21a0-c236-a7d8-ce7dba4efacf','b6e36c2a-9b64-c780-b8c2-3ad72f8c2100','311d94bb-98bd-2b9f-08f6-968caf310aa1','30b0db78-8f4e-d9ac-74a6-5569e114ec51'] +['085ad107-02f1-7923-bc64-7e42b4637d62','76ca80a3-4493-0e5d-d3ba-c269e6dcff7a','78b1fc1a-9617-6469-d4c1-e7400e855264','7c6ba877-1533-0897-43cb-dfa9f01faab2','5a93fb7a-59a4-0669-1bc0-3c084d8d91b5','3ed7d592-5110-ddc4-5b09-62d6e599ec08','30dabf5e-4bb4-4f6f-08a3-2da66221aa26','33ad19f4-5c15-0885-5ea7-468d774b73f2','26388262-e01d-1c58-df21-aec00d17a7d8'] +[] +['0a1ce45d-7e7c-1295-6005-6cc519af1ef0','a8285719-a806-6ef2-9d05-27c5668dcf6b','c67242ca-cc6d-ae56-1702-8c45b5d6e2a3','751e5172-2cdd-d4de-0694-a5753a1f80ad','b6e0792c-3f87-80fa-dd3d-5e3bcf883eb7','2f10e4d6-feb0-c82a-8051-44ff033734dc','fdc53bbe-4279-2bc7-cf4d-11340b2d2ab4','4b44f8cf-353f-fee0-69d9-5785e7d7c760','61e7590b-e7d8-62fd-16b2-7e0636d3cbe3'] +[] +- +FixedString(4) +6LC +R`6 +P +hc +eO + +L +2m= +wKP +N +- +String +-C 2% +\0)X +\0 $n +\0e +\0 +\0 o9 +\08 64 + +\0 99 + +- +[127] 141277.1199 ('2026-10-04 10:37:26.199','0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f') +[] 179109.9446 ('2032-05-19 12:49:55.908','44e143fb-3c07-b672-52b6-c77fded30154') +[] 12431.2908 ('2051-09-18 11:10:08.314','1b0100ba-41e3-9baf-4b2c-59032b97438d') +[54] 196857.2995 ('2008-08-07 20:05:10.749','c3ff2aa6-5ae3-c366-51d7-4fe2affc1e0d') +[] 108041.5314 ('2101-09-11 05:09:20.019','c7c5c206-d6ea-d1ef-54a4-c29ab73ed619') +[] -171632.9888 ('2062-07-10 05:18:58.924','7d9648e7-b8fa-00ad-5a13-478132d105b1') +[76] 210369.1749 ('2074-03-12 07:52:50.424','24e89aa8-6739-a840-0e90-591c73068627') +[] 121812.5110 ('2075-04-13 03:14:55.643','5afb0980-c265-66eb-cdc7-222d6ac83015') +[67] 77662.1019 ('1990-06-27 22:41:59.565','7047ebba-f7df-f0a7-a259-e3250e8fbe11') +[] -13911.9536 ('2071-06-01 23:38:13.679','10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9') +- +[-59] 1234817989 aR 123481.7989 o 4.50418660252953e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4024497c-b1f5-5b35-44e7-4cf478103f02') Ų +[-78] 1171957426 \0 117195.7426 w 4.838569580371409e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','a4739e76-3fbd-b537-34b8-c2b4c272296f') +[-25,-72] 275100647 \0 27510.0647 w 1.1548186651907869e308 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','1e8f333f-d6c7-cd48-7a23-dd0a93d8418f') ج +[] 1033685688 \02? 103368.5688 w 3.7022451508754407e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','f8cf0cb7-8a30-acab-b618-22f5db288237') _ +[-40] 180895192 \0 18089.5192 h 2.1459523436508332e307 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','fe7c4fab-b30e-0db2-fb21-64a726f71730') &R +[] 135557292 \0 13555.7292 w 8.576968910623855e307 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','dd091972-50eb-59db-242f-0c7311995279') A# +[-84,95] 716914271 \0 71691.4271 h 1.7471947254414614e308 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','c87feb6e-decf-2422-14d3-3312e8eef7b3') +[22,38] 1012211222 \0 101221.1222 h 1.2787095439887414e308 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','da4f0e49-9267-2228-10d9-dcd46ccff515') H +[82,65] 2185722662 \0h -210924.4634 w 1.7870585909530327e308 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','400d5020-f53c-224d-0e25-215f73801b7e') w +[35] 2647224658 \0 -164774.2638 o 1.763497936342361e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','8a9107d8-e5ce-78a0-fa35-9dc406168d47') \r +- diff --git a/dbms/tests/queries/0_stateless/01072_random_table_function.sql b/dbms/tests/queries/0_stateless/01087_table_function_generate.sql similarity index 100% rename from dbms/tests/queries/0_stateless/01072_random_table_function.sql rename to dbms/tests/queries/0_stateless/01087_table_function_generate.sql From 4c0f378931f59bd70e5ec1f00b24ce73bebabf71 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 27 Feb 2020 22:49:15 +0300 Subject: [PATCH 078/215] recanonize on linux --- .../01087_table_function_generate.reference | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference index 4d9fe812c05..6e356185110 100644 --- a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference @@ -143,27 +143,27 @@ Tuple(Int32, Array(Int64)) (-139119536,[]) - Nullable(String) --C 2% -\0)X -\0 $n -\0e -\0 -\0 o9 -\08 64 +-CRz!2%=1 +)X$U*B +0?,$n_&! +e;P,& +{<*8 +CT"o98o+ +8)7+64&X\' -\0 99 +pX*997@,L - Array(String) -['UL L> ','\0)} ','\0 #x ','\0f ','\0 ','\0 `& ','\0& WZ ','','\0 d^ '] -['','\0 %','\0 WP ','\0 ','','\0 '] -['\0 ;','\0 <8','\0 0E =Y','\0 " -I','\0 c','','\0 (9 ','\0- '] -['\0*? ','\0tj k\'','\0','','\0 `#'] -['\0 T\' x','\0 @T','\0 0','\0 G" '] -['\0M ','\0d` A9 ','\0','\0 ','\0m','\0 Hx ','\0 >P ','\08i @l'] -['\0 +m ','\0','\0 ;; 1','\0 v& ','\0x5 ','\0','\0 ','\0 1g','\0 N3 @'] +['UL*`+L>~ ',')}e&,t','2a%#x0]/','f\'J&Q','3 +(','|p)`&"/-','&5#-WZq> ','','Gf,d^m3/y'] +['','+%%F','-+WPe($q','$,"N','','e!B'] +['({ ;\'','=$<8j',',0EEm!=Y(','+ "i,#-IK',')co','','-(9(t!','-wz'] +['*?kY','tj~P/k\'H','.','','-n.`#2'] +['%T\'3Y"xM','0+@Tf',',0a','k G"QB$'] +['MH3','d`{K+A9ff',',','Y7','m5','3"Hx>q+(','WI+>P>6','8ip-/@lf'] +['.+msJ,j','Z','#;;Dv-1p','p&v&&m','x5R1!=','\\',',v','@2)1gx',',N3jX+@\\'] [] -['\0 ;\' ','\0 ','\0 %4','\0 ','\0 ','\0/ J','\0 43 ','\0T ','\0/) u#'] +['j&;\'j]*+','D#){','L.-%4E',' 0',')P.','/^D.J{','P*43gf!','T,q','/)RJ.u#6'] [] - UUID @@ -203,15 +203,15 @@ wK N - String --C 2% -\0)X -\0 $n -\0e -\0 -\0 o9 -\08 64 +-CRz!2%=1 +)X$U*B +0?,$n_&! +e;P,& +{<*8 +CT"o98o+ +8)7+64&X\' -\0 99 +pX*997@,L - [127] 141277.1199 ('2026-10-04 10:37:26.199','0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f') @@ -225,14 +225,14 @@ String [67] 77662.1019 ('1990-06-27 22:41:59.565','7047ebba-f7df-f0a7-a259-e3250e8fbe11') [] -13911.9536 ('2071-06-01 23:38:13.679','10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9') - -[-59] 1234817989 aR 123481.7989 o 4.50418660252953e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4024497c-b1f5-5b35-44e7-4cf478103f02') Ų -[-78] 1171957426 \0 117195.7426 w 4.838569580371409e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','a4739e76-3fbd-b537-34b8-c2b4c272296f') -[-25,-72] 275100647 \0 27510.0647 w 1.1548186651907869e308 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','1e8f333f-d6c7-cd48-7a23-dd0a93d8418f') ج -[] 1033685688 \02? 103368.5688 w 3.7022451508754407e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','f8cf0cb7-8a30-acab-b618-22f5db288237') _ -[-40] 180895192 \0 18089.5192 h 2.1459523436508332e307 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','fe7c4fab-b30e-0db2-fb21-64a726f71730') &R -[] 135557292 \0 13555.7292 w 8.576968910623855e307 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','dd091972-50eb-59db-242f-0c7311995279') A# -[-84,95] 716914271 \0 71691.4271 h 1.7471947254414614e308 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','c87feb6e-decf-2422-14d3-3312e8eef7b3') -[22,38] 1012211222 \0 101221.1222 h 1.2787095439887414e308 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','da4f0e49-9267-2228-10d9-dcd46ccff515') H -[82,65] 2185722662 \0h -210924.4634 w 1.7870585909530327e308 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','400d5020-f53c-224d-0e25-215f73801b7e') w -[35] 2647224658 \0 -164774.2638 o 1.763497936342361e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','8a9107d8-e5ce-78a0-fa35-9dc406168d47') \r +[-59] 1234817989 aRM8 123481.7989 o 4.50418660252953e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4024497c-b1f5-5b35-44e7-4cf478103f02') Ų +[-78] 1171957426 C 117195.7426 w 4.838569580371409e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','a4739e76-3fbd-b537-34b8-c2b4c272296f') +[-25,-72] 275100647 %5 27510.0647 w 1.1548186651907869e308 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','1e8f333f-d6c7-cd48-7a23-dd0a93d8418f') ج +[] 1033685688 2?E 103368.5688 w 3.7022451508754407e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','f8cf0cb7-8a30-acab-b618-22f5db288237') _ +[-40] 180895192 (g 18089.5192 h 2.1459523436508332e307 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','fe7c4fab-b30e-0db2-fb21-64a726f71730') &R +[] 135557292 ;( 13555.7292 w 8.576968910623855e307 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','dd091972-50eb-59db-242f-0c7311995279') A# +[-84,95] 716914271 [ 71691.4271 h 1.7471947254414614e308 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','c87feb6e-decf-2422-14d3-3312e8eef7b3') +[22,38] 1012211222 L 101221.1222 h 1.2787095439887414e308 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','da4f0e49-9267-2228-10d9-dcd46ccff515') H +[82,65] 2185722662 h/ -210924.4634 w 1.7870585909530327e308 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','400d5020-f53c-224d-0e25-215f73801b7e') w +[35] 2647224658 (!f -164774.2638 o 1.763497936342361e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','8a9107d8-e5ce-78a0-fa35-9dc406168d47') \r - From 0a25fe71bd0789352b4897927d981b21fc253d28 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 28 Feb 2020 12:19:11 +0300 Subject: [PATCH 079/215] more docs --- dbms/src/Storages/StorageGenerate.cpp | 13 +++-- .../TableFunctions/TableFunctionGenerate.cpp | 2 - docs/en/operations/table_engines/generate.md | 53 +++++++++++++++++++ docs/fa/operations/table_engines/generate.md | 1 + docs/ja/operations/table_engines/generate.md | 1 + docs/ru/operations/table_engines/generate.md | 1 + docs/zh/operations/table_engines/generate.md | 1 + 7 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 docs/en/operations/table_engines/generate.md create mode 120000 docs/fa/operations/table_engines/generate.md create mode 120000 docs/ja/operations/table_engines/generate.md create mode 120000 docs/ru/operations/table_engines/generate.md create mode 120000 docs/zh/operations/table_engines/generate.md diff --git a/dbms/src/Storages/StorageGenerate.cpp b/dbms/src/Storages/StorageGenerate.cpp index 1b72255bc9f..d9e9440535b 100644 --- a/dbms/src/Storages/StorageGenerate.cpp +++ b/dbms/src/Storages/StorageGenerate.cpp @@ -29,9 +29,10 @@ namespace DB namespace ErrorCodes { -extern const int DATABASE_ACCESS_DENIED; +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_TYPE_OF_FIELD; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int EMPTY_LIST_OF_COLUMNS_PASSED; } void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, @@ -223,7 +224,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, chars.resize(offset); for (UInt64 i = 0; i < offset; ++i) { - if (offset - i > 5 ) { + if (offset - i > 5) + { UInt32 r = generator(); chars[i] = 32 + (r & 0x7F) % 95; chars[i+1] = 32 + ((r >> 7) & 0x7F) % 95; @@ -232,7 +234,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, chars[i+4] = 32 + (r >> 28); i+=4; } - else { + else + { UInt32 r = generator(); chars[i] = 32 + (r % 95); } @@ -398,7 +401,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, break; } case TypeIndex::Function: - throw Exception("Type 'Funclion' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Type 'Function' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); case TypeIndex::AggregateFunction: throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED); case TypeIndex::LowCardinality: diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.cpp b/dbms/src/TableFunctions/TableFunctionGenerate.cpp index cf23977a18a..fe3ae5f33df 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerate.cpp @@ -22,8 +22,6 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int BAD_TYPE_OF_FIELD; extern const int LOGICAL_ERROR; } diff --git a/docs/en/operations/table_engines/generate.md b/docs/en/operations/table_engines/generate.md new file mode 100644 index 00000000000..126acb05626 --- /dev/null +++ b/docs/en/operations/table_engines/generate.md @@ -0,0 +1,53 @@ +# Generate {#table_engines-generate} + +The Generate table engine produces random data for given table schema. + +Usage examples: + +- Use in test to populate reproducible large table. +- Generate random input for fuzzing tests. + +## Usage in ClickHouse Server + +```sql +Generate(max_array_length, max_string_length, random_seed) +``` + +The `max_array_length` and `max_string_length` parameters specify maximum length of all +array columns and strings correspondingly in generated data. + +Generate table engine supports only `SELECT` queries. + +It supports all [DataTypes](../../data_types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. + +**Example:** + +**1.** Set up the `generate_engine_table` table: + +```sql +CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE=Generate(3, 5, 1) +``` + +**2.** Query the data: + +```sql +SELECT * FROM generate_engine_table LIMIT 3 +``` + +```text +┌─name─┬──────value─┐ +│ c4xJ │ 1412771199 │ +│ r │ 1791099446 │ +│ 7#$ │ 124312908 │ +└──────┴────────────┘ +``` + +## Details of Implementation +- Not supported: + - `ALTER` + - `SELECT ... SAMPLE` + - `INSERT` + - Indices + - Replication + +[Original article](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/fa/operations/table_engines/generate.md b/docs/fa/operations/table_engines/generate.md new file mode 120000 index 00000000000..28cd09533e5 --- /dev/null +++ b/docs/fa/operations/table_engines/generate.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/generate.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/generate.md b/docs/ja/operations/table_engines/generate.md new file mode 120000 index 00000000000..28cd09533e5 --- /dev/null +++ b/docs/ja/operations/table_engines/generate.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/generate.md \ No newline at end of file diff --git a/docs/ru/operations/table_engines/generate.md b/docs/ru/operations/table_engines/generate.md new file mode 120000 index 00000000000..28cd09533e5 --- /dev/null +++ b/docs/ru/operations/table_engines/generate.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/generate.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/generate.md b/docs/zh/operations/table_engines/generate.md new file mode 120000 index 00000000000..28cd09533e5 --- /dev/null +++ b/docs/zh/operations/table_engines/generate.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/generate.md \ No newline at end of file From b0b391d472c2a28331a9a67e520b71b7fe214b31 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Feb 2020 19:29:16 +0300 Subject: [PATCH 080/215] Change endless performance tests (1) --- dbms/tests/performance/array_element.xml | 8 ++++---- dbms/tests/performance/array_join.xml | 14 +++++++------- dbms/tests/performance/base64.xml | 12 ++++++------ dbms/tests/performance/base64_hits.xml | 10 +++++----- dbms/tests/performance/bitCount.xml | 4 ++-- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/dbms/tests/performance/array_element.xml b/dbms/tests/performance/array_element.xml index 672683fe146..2d05f9dcf51 100644 --- a/dbms/tests/performance/array_element.xml +++ b/dbms/tests/performance/array_element.xml @@ -1,5 +1,5 @@ - once + loop @@ -9,7 +9,7 @@ - SELECT count() FROM system.numbers WHERE NOT ignore([[1], [2]][number % 2 + 2]) - SELECT count() FROM system.numbers WHERE NOT ignore([[], [2]][number % 2 + 2]) - SELECT count() FROM system.numbers WHERE NOT ignore([[], []][number % 2 + 2]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore([[], []][number % 2 + 2]) diff --git a/dbms/tests/performance/array_join.xml b/dbms/tests/performance/array_join.xml index d2eb213ce03..c30039588e0 100644 --- a/dbms/tests/performance/array_join.xml +++ b/dbms/tests/performance/array_join.xml @@ -1,5 +1,5 @@ - once + loop @@ -9,10 +9,10 @@ - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 - SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM system.numbers) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 - SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM system.numbers) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM numbers(1000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM numbers(1000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 diff --git a/dbms/tests/performance/base64.xml b/dbms/tests/performance/base64.xml index 651412c2752..3175c7811bf 100644 --- a/dbms/tests/performance/base64.xml +++ b/dbms/tests/performance/base64.xml @@ -1,5 +1,5 @@ - once + loop @@ -24,13 +24,13 @@ table - numbers - numbers_mt + numbers(1000000) + numbers_mt(10000000) - SELECT count() FROM system.{table} WHERE NOT ignore(base64Encode({string})) - SELECT count() FROM system.{table} WHERE base64Decode(base64Encode({string})) != {string} - SELECT count() FROM system.{table} WHERE tryBase64Decode(base64Encode({string})) != {string} + SELECT count() FROM {table} WHERE NOT ignore(base64Encode({string})) + SELECT count() FROM {table} WHERE base64Decode(base64Encode({string})) != {string} + SELECT count() FROM {table} WHERE tryBase64Decode(base64Encode({string})) != {string} diff --git a/dbms/tests/performance/base64_hits.xml b/dbms/tests/performance/base64_hits.xml index 7b07f3badb7..63916dcee4e 100644 --- a/dbms/tests/performance/base64_hits.xml +++ b/dbms/tests/performance/base64_hits.xml @@ -1,8 +1,8 @@ - loop + once - hits_100m_single + hits_10m_single @@ -28,7 +28,7 @@ - SELECT count() FROM hits_100m_single WHERE NOT ignore(base64Encode({string})) - SELECT count() FROM hits_100m_single WHERE base64Decode(base64Encode({string})) != {string} - SELECT count() FROM hits_100m_single WHERE tryBase64Decode(base64Encode({string})) != {string} + SELECT count() FROM hits_10m_single WHERE NOT ignore(base64Encode({string})) + SELECT count() FROM hits_10m_single WHERE base64Decode(base64Encode({string})) != {string} + SELECT count() FROM hits_10m_single WHERE tryBase64Decode(base64Encode({string})) != {string} diff --git a/dbms/tests/performance/bitCount.xml b/dbms/tests/performance/bitCount.xml index 8936f700b51..8a58f501180 100644 --- a/dbms/tests/performance/bitCount.xml +++ b/dbms/tests/performance/bitCount.xml @@ -1,5 +1,5 @@ - once + loop @@ -23,5 +23,5 @@ - SELECT bitCount({expr}) FROM system.numbers + SELECT bitCount({expr}) FROM numbers(1000000) From fa6ff1aa27308d6fddbd72cc1522b08e163ee90c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Feb 2020 19:50:13 +0300 Subject: [PATCH 081/215] Change some endless performance tests (2) --- dbms/tests/performance/bounding_ratio.xml | 7 +++---- .../performance/complex_array_creation.xml | 7 +++---- dbms/tests/performance/conditional.xml | 19 +++++++++---------- dbms/tests/performance/consistent_hashes.xml | 7 +++---- dbms/tests/performance/count.xml | 4 ---- .../performance/cryptographic_hashes.xml | 9 ++++----- 6 files changed, 22 insertions(+), 31 deletions(-) diff --git a/dbms/tests/performance/bounding_ratio.xml b/dbms/tests/performance/bounding_ratio.xml index efa2c10d989..4bf50f57290 100644 --- a/dbms/tests/performance/bounding_ratio.xml +++ b/dbms/tests/performance/bounding_ratio.xml @@ -1,14 +1,13 @@ - once + loop - 2000 10000 - SELECT boundingRatio(number, number) FROM system.numbers - SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM system.numbers + SELECT boundingRatio(number, number) FROM numbers(1000000) + SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(1000000) diff --git a/dbms/tests/performance/complex_array_creation.xml b/dbms/tests/performance/complex_array_creation.xml index a5ff824d6de..76e4910a1d7 100644 --- a/dbms/tests/performance/complex_array_creation.xml +++ b/dbms/tests/performance/complex_array_creation.xml @@ -1,14 +1,13 @@ - once + loop - 1000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore([[number], [number]]) - SELECT count() FROM system.numbers WHERE NOT ignore([[], [number]]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore([[number], [number]]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore([[], [number]]) diff --git a/dbms/tests/performance/conditional.xml b/dbms/tests/performance/conditional.xml index 96f48fb401a..eea43d6556a 100644 --- a/dbms/tests/performance/conditional.xml +++ b/dbms/tests/performance/conditional.xml @@ -1,20 +1,19 @@ - once + loop - 3000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) - SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) - SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) - SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(multiIf(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) - SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, toDateTime(rand()), toDate(rand()))) - SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, toDateTime(rand()), toDate(rand()))) - SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, [toDateTime(rand())], [toDate(rand())])) - SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, [toDateTime(rand())], [toDate(rand())])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(if(rand() % 2, toDateTime(rand()), toDate(rand()))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime(rand()), toDate(rand()))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(if(rand() % 2, [toDateTime(rand())], [toDate(rand())])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(multiIf(rand() % 2, [toDateTime(rand())], [toDate(rand())])) diff --git a/dbms/tests/performance/consistent_hashes.xml b/dbms/tests/performance/consistent_hashes.xml index 7219aa00c1a..aee232ddce6 100644 --- a/dbms/tests/performance/consistent_hashes.xml +++ b/dbms/tests/performance/consistent_hashes.xml @@ -1,9 +1,8 @@ - once + loop - 6000 15000 @@ -27,6 +26,6 @@ - SELECT {hash_func}(number, {buckets}) FROM system.numbers - SELECT sumburConsistentHash(toUInt32(number), {buckets}) FROM system.numbers + SELECT {hash_func}(number, {buckets}) FROM numbers(1000000) + SELECT sumburConsistentHash(toUInt32(number), {buckets}) FROM numbers(1000000) diff --git a/dbms/tests/performance/count.xml b/dbms/tests/performance/count.xml index 0244adf4b38..3bb4a0d2cd5 100644 --- a/dbms/tests/performance/count.xml +++ b/dbms/tests/performance/count.xml @@ -6,10 +6,6 @@ 30000 - - 6000 - 60000 - diff --git a/dbms/tests/performance/cryptographic_hashes.xml b/dbms/tests/performance/cryptographic_hashes.xml index 7840a7b382a..71c37ed9f8d 100644 --- a/dbms/tests/performance/cryptographic_hashes.xml +++ b/dbms/tests/performance/cryptographic_hashes.xml @@ -1,12 +1,11 @@ - once + loop 10000 - 5000 20000 @@ -36,11 +35,11 @@ table - numbers - numbers_mt + numbers(1000000) + numbers_mt(10000000) - SELECT ignore({crypto_hash_func}({string})) FROM system.{table} + SELECT ignore({crypto_hash_func}({string})) FROM {table} From adb615405281b74471b46c9a61301ae57b67da9c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Feb 2020 19:56:34 +0300 Subject: [PATCH 082/215] Change endless performance tests (3) --- dbms/tests/performance/date_time.xml | 13 ++++++------- dbms/tests/performance/entropy.xml | 1 - dbms/tests/performance/float_formatting.xml | 5 ++--- dbms/tests/performance/float_parsing.xml | 5 ++--- dbms/tests/performance/format_date_time.xml | 5 ++--- dbms/tests/performance/functions_coding.xml | 11 +++++------ 6 files changed, 17 insertions(+), 23 deletions(-) diff --git a/dbms/tests/performance/date_time.xml b/dbms/tests/performance/date_time.xml index 77a6c634b34..e099b468560 100644 --- a/dbms/tests/performance/date_time.xml +++ b/dbms/tests/performance/date_time.xml @@ -1,6 +1,6 @@ - once + loop long @@ -8,7 +8,6 @@ - 1000 10000 @@ -133,11 +132,11 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) - SELECT count() FROM system.numbers WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) - SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) - SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) - \ No newline at end of file + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) +
diff --git a/dbms/tests/performance/entropy.xml b/dbms/tests/performance/entropy.xml index dcede345792..45c9ccb840d 100644 --- a/dbms/tests/performance/entropy.xml +++ b/dbms/tests/performance/entropy.xml @@ -10,7 +10,6 @@ 10000 - 5000 20000 diff --git a/dbms/tests/performance/float_formatting.xml b/dbms/tests/performance/float_formatting.xml index 0216e524735..aaf2fad0c93 100644 --- a/dbms/tests/performance/float_formatting.xml +++ b/dbms/tests/performance/float_formatting.xml @@ -1,5 +1,5 @@ - once + loop long @@ -9,7 +9,6 @@ 10000 - 5000 20000 @@ -54,5 +53,5 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(toString({expr})) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toString({expr})) diff --git a/dbms/tests/performance/float_parsing.xml b/dbms/tests/performance/float_parsing.xml index 81f30540dd1..e7779751fa4 100644 --- a/dbms/tests/performance/float_parsing.xml +++ b/dbms/tests/performance/float_parsing.xml @@ -1,5 +1,5 @@ - once + loop long @@ -9,7 +9,6 @@ 10000 - 5000 20000 @@ -33,5 +32,5 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(toFloat64({expr})) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toFloat64({expr})) diff --git a/dbms/tests/performance/format_date_time.xml b/dbms/tests/performance/format_date_time.xml index 0ecdb37734d..aa070c40ec5 100644 --- a/dbms/tests/performance/format_date_time.xml +++ b/dbms/tests/performance/format_date_time.xml @@ -1,12 +1,11 @@ - once + loop - 1000 10000 @@ -25,5 +24,5 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, formatDateTime(t, '{format}')) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, formatDateTime(t, '{format}')) diff --git a/dbms/tests/performance/functions_coding.xml b/dbms/tests/performance/functions_coding.xml index 552f88be7fa..93e16a8a221 100644 --- a/dbms/tests/performance/functions_coding.xml +++ b/dbms/tests/performance/functions_coding.xml @@ -1,15 +1,14 @@ - once + loop - 5000 20000 - SELECT count() FROM system.numbers WHERE NOT ignore(MACNumToString(number)) - SELECT count() FROM system.numbers WHERE NOT ignore(MACStringToNum(MACNumToString(number))) - SELECT count() FROM system.numbers_mt WHERE NOT ignore(MACNumToString(rand64())) - SELECT count() FROM system.numbers_mt WHERE NOT ignore(MACStringToNum(MACNumToString(rand64()))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(MACNumToString(number)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(MACStringToNum(MACNumToString(number))) + SELECT count() FROM numbers_mt(10000000) WHERE NOT ignore(MACNumToString(rand64())) + SELECT count() FROM numbers_mt(10000000) WHERE NOT ignore(MACStringToNum(MACNumToString(rand64()))) From 420a7097689c17dd163b1ffa5541ccdfcf899a1f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Feb 2020 20:02:43 +0300 Subject: [PATCH 083/215] Change endless performance tests (4) --- dbms/tests/performance/functions_geo.xml | 15 +++++++-------- dbms/tests/performance/great_circle_dist.xml | 7 +++---- dbms/tests/performance/group_array_moving_sum.xml | 1 - dbms/tests/performance/h3.xml | 5 ++--- dbms/tests/performance/if_array_num.xml | 15 +++++++-------- dbms/tests/performance/if_array_string.xml | 15 +++++++-------- 6 files changed, 26 insertions(+), 32 deletions(-) diff --git a/dbms/tests/performance/functions_geo.xml b/dbms/tests/performance/functions_geo.xml index 1a13262d52f..a4233b2fe57 100644 --- a/dbms/tests/performance/functions_geo.xml +++ b/dbms/tests/performance/functions_geo.xml @@ -1,21 +1,20 @@ - once + loop - 1000 5000 - SELECT count() FROM system.numbers WHERE NOT ignore(geohashEncode((number % 150)*1.1 - 75, (number * 3.14 % 300)*1.1 - 150)) - SELECT count() FROM system.numbers WHERE NOT ignore(geohashDecode(toString(number % 1000000))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(geohashEncode((number % 150)*1.1 - 75, (number * 3.14 % 300)*1.1 - 150)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(geohashDecode(toString(number % 1000000))) - SELECT count() FROM system.numbers WHERE NOT ignore(geohashEncode(1.0/rand(), 2.0/rand())) - SELECT count() FROM system.numbers WHERE NOT ignore(geohashDecode(toString(rand() % 1000000))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(geohashEncode(1.0/rand(), 2.0/rand())) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(geohashDecode(toString(rand() % 1000000))) - SELECT count() FROM system.numbers WHERE NOT ignore(geohashEncode(number + 91.0, number + 181.0)) - SELECT count() FROM system.numbers WHERE NOT ignore(geohashDecode(hex(number))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(geohashEncode(number + 91.0, number + 181.0)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(geohashDecode(hex(number))) diff --git a/dbms/tests/performance/great_circle_dist.xml b/dbms/tests/performance/great_circle_dist.xml index 99382543d60..3edfc2c8008 100644 --- a/dbms/tests/performance/great_circle_dist.xml +++ b/dbms/tests/performance/great_circle_dist.xml @@ -1,16 +1,15 @@ - once + loop - 1000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance((rand(1) % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand(2)) / 4294967296 - 180, (rand(3) % 180) * 1. - 90)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(greatCircleDistance((rand(1) % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand(2)) / 4294967296 - 180, (rand(3) % 180) * 1. - 90)) - SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance(55. + toFloat64(rand(1)) / 4294967296, 37. + toFloat64(rand(2)) / 4294967296, 55. + toFloat64(rand(3)) / 4294967296, 37. + toFloat64(rand(4)) / 4294967296)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(greatCircleDistance(55. + toFloat64(rand(1)) / 4294967296, 37. + toFloat64(rand(2)) / 4294967296, 55. + toFloat64(rand(3)) / 4294967296, 37. + toFloat64(rand(4)) / 4294967296)) diff --git a/dbms/tests/performance/group_array_moving_sum.xml b/dbms/tests/performance/group_array_moving_sum.xml index 504a8b133a1..45878d43e0e 100644 --- a/dbms/tests/performance/group_array_moving_sum.xml +++ b/dbms/tests/performance/group_array_moving_sum.xml @@ -7,7 +7,6 @@ 30000 - 6000 60000 diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml index 7381f559a0f..3a6d5940d0d 100644 --- a/dbms/tests/performance/h3.xml +++ b/dbms/tests/performance/h3.xml @@ -1,14 +1,13 @@ - once + loop - 2000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(37.62 + rand(1) / 0x100000000, 55.75 + rand(2) / 0x100000000, 15)) + SELECT count() FROM numbers(100000) WHERE NOT ignore(geoToH3(37.62 + rand(1) / 0x100000000, 55.75 + rand(2) / 0x100000000, 15)) diff --git a/dbms/tests/performance/if_array_num.xml b/dbms/tests/performance/if_array_num.xml index 417b82a9d0c..375290e635c 100644 --- a/dbms/tests/performance/if_array_num.xml +++ b/dbms/tests/performance/if_array_num.xml @@ -1,18 +1,17 @@ - once + loop - 1000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [4, 5]) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([4, 5])) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([4, 5])) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [400, 500]) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([400, 500])) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([400, 500])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [4, 5]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([4, 5])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([4, 5])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [400, 500]) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([400, 500])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([400, 500])) diff --git a/dbms/tests/performance/if_array_string.xml b/dbms/tests/performance/if_array_string.xml index e1d8485adc2..1f14393ee16 100644 --- a/dbms/tests/performance/if_array_string.xml +++ b/dbms/tests/performance/if_array_string.xml @@ -1,18 +1,17 @@ - once + loop - 1000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c']) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c']) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c'])) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : materialize(['a', 'b', 'c'])) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['', '']) : emptyArrayString()) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? materialize(['https://github.com/ClickHouse/ClickHouse/pull/1070', 'https://www.google.ru/search?newwindow=1&site=&source=hp&q=zookeeper+wire+protocol+exists&oq=zookeeper+wire+protocol+exists&gs_l=psy-ab.3...330.6300.0.6687.33.28.0.0.0.0.386.4838.0j5j9j5.19.0....0...1.1.64.psy-ab..14.17.4448.0..0j35i39k1j0i131k1j0i22i30k1j0i19k1j33i21k1.r_3uFoNOrSU']) : emptyArrayString()) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c']) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c']) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c'])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : materialize(['a', 'b', 'c'])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['', '']) : emptyArrayString()) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['https://github.com/ClickHouse/ClickHouse/pull/1070', 'https://www.google.ru/search?newwindow=1&site=&source=hp&q=zookeeper+wire+protocol+exists&oq=zookeeper+wire+protocol+exists&gs_l=psy-ab.3...330.6300.0.6687.33.28.0.0.0.0.386.4838.0j5j9j5.19.0....0...1.1.64.psy-ab..14.17.4448.0..0j35i39k1j0i131k1j0i22i30k1j0i19k1j33i21k1.r_3uFoNOrSU']) : emptyArrayString()) From 72d2e56c0aea08f6fee4bfded59a453de8c26aa8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Feb 2020 22:44:15 +0300 Subject: [PATCH 084/215] fixup --- dbms/tests/performance/array_element.xml | 1 - dbms/tests/performance/array_join.xml | 1 - dbms/tests/performance/base64.xml | 1 - dbms/tests/performance/base64_hits.xml | 1 - dbms/tests/performance/bitCount.xml | 1 - 5 files changed, 5 deletions(-) diff --git a/dbms/tests/performance/array_element.xml b/dbms/tests/performance/array_element.xml index 2d05f9dcf51..1040c33ddbf 100644 --- a/dbms/tests/performance/array_element.xml +++ b/dbms/tests/performance/array_element.xml @@ -3,7 +3,6 @@ - 2000 10000 diff --git a/dbms/tests/performance/array_join.xml b/dbms/tests/performance/array_join.xml index c30039588e0..7574c5f8b15 100644 --- a/dbms/tests/performance/array_join.xml +++ b/dbms/tests/performance/array_join.xml @@ -3,7 +3,6 @@ - 5000 10000 diff --git a/dbms/tests/performance/base64.xml b/dbms/tests/performance/base64.xml index 3175c7811bf..8860ec232e8 100644 --- a/dbms/tests/performance/base64.xml +++ b/dbms/tests/performance/base64.xml @@ -6,7 +6,6 @@ 10000 - 5000 20000 diff --git a/dbms/tests/performance/base64_hits.xml b/dbms/tests/performance/base64_hits.xml index 63916dcee4e..5002edccaea 100644 --- a/dbms/tests/performance/base64_hits.xml +++ b/dbms/tests/performance/base64_hits.xml @@ -10,7 +10,6 @@ 10000 - 5000 20000 diff --git a/dbms/tests/performance/bitCount.xml b/dbms/tests/performance/bitCount.xml index 8a58f501180..c714610f351 100644 --- a/dbms/tests/performance/bitCount.xml +++ b/dbms/tests/performance/bitCount.xml @@ -4,7 +4,6 @@ - 2000 10000 From bf9c663b489e6084b87d7efa28aacec9a0d98980 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Sat, 29 Feb 2020 01:28:17 +0300 Subject: [PATCH 085/215] add unit test for MergeTreeSetIndex --- .../tests/gtest_merge_tree_set_index.cpp | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp diff --git a/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp b/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp new file mode 100644 index 00000000000..709c3fc7e73 --- /dev/null +++ b/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include + +#include + +using namespace DB; + +TEST(MergeTreeSetIndex, checkInRange_one) +{ + DataTypes types = {std::make_shared()}; + + auto mut = types[0]->createColumn(); + mut->insert(1); + mut->insert(5); + mut->insert(7); + + Columns columns = {std::move(mut)}; + + std::vector mapping = {{0, 0, {}}}; + auto set = std::make_unique(columns, std::move(mapping)); + + // Left and right bounded + std::vector ranges = {Range(1, true, 4, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(1, 4)"; + + ranges = {Range(2, true, 4, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(2, 4)"; + + ranges = {Range(-1, true, 0, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(-1, 0)"; + + ranges = {Range(-1, true, 10, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-1, 10)"; + + // Left bounded + ranges = {Range::createLeftBounded(1, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(1, +inf)"; + + ranges = {Range::createLeftBounded(-1, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-1, +inf)"; + + ranges = {Range::createLeftBounded(10, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(10, +inf)"; + + // Right bounded + ranges = {Range::createRightBounded(1, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-inf, 1)"; + + ranges = {Range::createRightBounded(-1, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(-inf, -1)"; + + ranges = {Range::createRightBounded(10, true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-inf, 10)"; +} + +TEST(MergeTreeSetIndex, checkInRange_tuple) +{ + DataTypes types = {std::make_shared(), std::make_shared()}; + + Columns columns; + { + auto values = {1, 1, 3, 3, 3, 10}; + auto mut = types[0]->createColumn(); + for (auto & val : values) + mut->insert(val); + columns.push_back(std::move(mut)); + } + + { + auto values = {"a", "b", "a", "a", "b", "c"}; + auto mut = types[1]->createColumn(); + for (auto & val : values) + mut->insert(val); + columns.push_back(std::move(mut)); + } + + std::vector mapping = {{0, 0, {}}, {1, 1, {}}}; + auto set = std::make_unique(columns, std::move(mapping)); + + std::vector ranges = {Range(1), Range("a", true, "c", true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(1), Range('a', true, 'c', true)"; + + ranges = {Range(1, false, 3, false), Range()}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(1, false, 3, false), Range()"; + + ranges = {Range(2, false, 5, false), Range()}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(2, false, 5, false), Range()"; + + ranges = {Range(3), Range::createLeftBounded("a", true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(3), Range::createLeftBounded('a', true)"; + + ranges = {Range(3), Range::createLeftBounded("f", true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(3), Range::createLeftBounded('f', true)"; + + ranges = {Range(3), Range::createRightBounded("a", true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(3), Range::createRightBounded('a', true)"; + + ranges = {Range(3), Range::createRightBounded("b", true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(3), Range::createRightBounded('b', true)"; + + ranges = {Range(1), Range("b")}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(1), Range('b')"; + + ranges = {Range(1), Range("c")}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(1), Range('c')"; + + ranges = {Range(2, true, 3, true), Range()}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(2, true, 3, true), Range('x', true, 'z', true)"; + + ranges = {Range(2), Range("a", true, "z", true)}; + ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(2, true, 3, true), Range('c', true, 'z', true)"; +} From e21971cce4117ab829d1c4b542d79c49844f6593 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Sat, 29 Feb 2020 16:14:19 +0300 Subject: [PATCH 086/215] fix style check --- .../src/Interpreters/tests/gtest_merge_tree_set_index.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp b/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp index 709c3fc7e73..6cef78f7c70 100644 --- a/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp +++ b/dbms/src/Interpreters/tests/gtest_merge_tree_set_index.cpp @@ -10,14 +10,14 @@ using namespace DB; TEST(MergeTreeSetIndex, checkInRange_one) { DataTypes types = {std::make_shared()}; - + auto mut = types[0]->createColumn(); mut->insert(1); mut->insert(5); mut->insert(7); Columns columns = {std::move(mut)}; - + std::vector mapping = {{0, 0, {}}}; auto set = std::make_unique(columns, std::move(mapping)); @@ -43,7 +43,7 @@ TEST(MergeTreeSetIndex, checkInRange_one) ranges = {Range::createLeftBounded(10, true)}; ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(10, +inf)"; - + // Right bounded ranges = {Range::createRightBounded(1, true)}; ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-inf, 1)"; @@ -75,7 +75,7 @@ TEST(MergeTreeSetIndex, checkInRange_tuple) mut->insert(val); columns.push_back(std::move(mut)); } - + std::vector mapping = {{0, 0, {}}, {1, 1, {}}}; auto set = std::make_unique(columns, std::move(mapping)); From 01af9e379576fc01b52611ecf9e8cf7bd41c014b Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 2 Mar 2020 15:19:27 +0300 Subject: [PATCH 087/215] save changes --- dbms/src/Storages/StorageGenerate.cpp | 107 +++++++++++++++----------- dbms/src/Storages/StorageGenerate.h | 4 +- 2 files changed, 65 insertions(+), 46 deletions(-) diff --git a/dbms/src/Storages/StorageGenerate.cpp b/dbms/src/Storages/StorageGenerate.cpp index d9e9440535b..e38a14fb3f6 100644 --- a/dbms/src/Storages/StorageGenerate.cpp +++ b/dbms/src/Storages/StorageGenerate.cpp @@ -35,8 +35,9 @@ extern const int BAD_TYPE_OF_FIELD; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } + void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, - UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed) + UInt64 max_array_length, UInt64 max_string_length, pcg32& generator, pcg64_fast& generator64) { TypeIndex idx = type->getTypeId(); (void) max_string_length; @@ -49,7 +50,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -60,7 +60,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -71,7 +70,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -82,10 +80,9 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - UInt64 a = static_cast(generator()); + UInt64 a = static_cast(generator64()); data[i] = static_cast(a); } break; @@ -96,7 +93,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -107,7 +103,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -118,7 +113,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -129,11 +123,9 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - Int64 a = static_cast(generator()); - data[i] = static_cast(a); + data[i] = static_cast(generator64()); } break; } @@ -143,7 +135,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); double d = 1.0; for (UInt64 i = 0; i < limit; ++i) { @@ -156,12 +147,11 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_fast generator(random_seed); double d = 1.0; for (UInt64 i = 0; i < limit; ++i) { d = std::numeric_limits::max(); - data[i] = (d / pcg64::max()) * generator(); + data[i] = (d / pcg64::max()) * generator64(); } break; } @@ -169,7 +159,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -180,7 +169,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -196,7 +184,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { UInt32 fractional = static_cast(generator()) % intExp10(scale); @@ -214,7 +201,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, UInt64 offset = 0; { - pcg32 generator(random_seed); offsets.resize(limit); for (UInt64 i = 0; i < limit; ++i) { @@ -256,7 +242,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, UInt64 num_chars = static_cast(len) * limit; { - pcg32 generator(random_seed); chars.resize(num_chars); for (UInt64 i = 0; i < num_chars; ++i) { @@ -270,7 +255,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, auto values = typeid_cast *>(type.get())->getValues(); auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); UInt8 size = values.size(); UInt8 off; @@ -286,7 +270,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, auto values = typeid_cast *>(type.get())->getValues(); auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); UInt16 size = values.size(); UInt8 off; @@ -301,7 +284,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg32 generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { data[i] = static_cast(generator()); @@ -312,7 +294,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); @@ -324,10 +305,9 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - Int128 x = static_cast(generator()) << 64 | static_cast(generator()); + Int128 x = static_cast(generator64()) << 64 | static_cast(generator64()); data[i] = x; } } @@ -336,11 +316,10 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, { auto & data = typeid_cast &>(column).getData(); data.resize(limit); - pcg64_fast generator(random_seed); for (UInt64 i = 0; i < limit; ++i) { - UInt64 a = static_cast(generator()); - UInt64 b = static_cast(generator()); + UInt64 a = static_cast(generator64()); + UInt64 b = static_cast(generator64()); auto x = UInt128(a, b); data[i] = x; } @@ -356,7 +335,6 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, UInt64 offset = 0; { - pcg32 generator(random_seed); offsets.resize(limit); for (UInt64 i = 0; i < limit; ++i) { @@ -364,7 +342,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, offsets[i] = offset; } } - fillColumnWithRandomData(data, nested_type, offset, max_array_length, max_string_length, random_seed); + fillColumnWithRandomData(data, nested_type, offset, max_array_length, max_string_length, generator, generator64); break; } case TypeIndex::Tuple: @@ -374,7 +352,7 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, for (size_t i = 0; i < column_tuple.tupleSize(); ++i) { - fillColumnWithRandomData(column_tuple.getColumn(i), elements[i], limit, max_array_length, max_string_length, random_seed); + fillColumnWithRandomData(column_tuple.getColumn(i), elements[i], limit, max_array_length, max_string_length, generator, generator64); } break; } @@ -390,9 +368,8 @@ void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, auto & null_map = column_nullable.getNullMapData(); IColumn & nested_column = column_nullable.getNestedColumn(); - fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, random_seed); + fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, generator, generator64); - pcg32 generator(random_seed); null_map.resize(limit); for (UInt64 i = 0; i < limit; ++i) { @@ -418,6 +395,52 @@ StorageGenerate::StorageGenerate(const StorageID & table_id_, const ColumnsDescr } +class GenerateSource : public SourceWithProgress +{ +public: + GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_) + : SourceWithProgress(block_header_), block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_) + , block_header(block_header_), r32(random_seed_), r64(random_seed_) {} + + String getName() const override { return "Generate"; } + +protected: + Chunk generate() override + { + + for (auto & ctn : block_header.getColumnsWithTypeAndName()) + { + fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, block_size, max_array_length, max_string_length, r32, r64); + } + + auto column = ColumnUInt64::create(block_size); + ColumnUInt64::Container & vec = column->getData(); + + size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class. + UInt64 * pos = vec.data(); /// This also accelerates the code. + UInt64 * end = &vec[block_size]; + while (pos < end) + *pos++ = curr++; + + next += step; + + progress({column->size(), column->byteSize()}); + + return { Columns {std::move(column)}, block_size }; + } + +private: + UInt64 block_size; + UInt64 max_array_length; + UInt64 max_string_length; + Block block_header; + + pcg32 r32; + pcg64 r64; + +}; + + void registerStorageGenerate(StorageFactory & factory) { factory.registerStorage("Generate", [](const StorageFactory::Arguments & args) @@ -453,28 +476,26 @@ Pipes StorageGenerate::read( const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*num_streams*/) + unsigned num_streams) { check(column_names, true); Pipes pipes; - const ColumnsDescription & columns_ = getColumns(); + pipes.reserve(num_streams); + const ColumnsDescription & columns_ = getColumns(); + Block block_header; for (const auto & name : column_names) { const auto & name_type = columns_.get(name); MutableColumnPtr column = name_type.type->createColumn(); - res_block.insert({std::move(column), name_type.type, name_type.name}); + block_header.insert({std::move(column), name_type.type, name_type.name}); } - for (auto & ctn : res_block.getColumnsWithTypeAndName()) + for (UInt64 i = 0; i < num_streams; ++i) { - fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, max_block_size, max_array_length, max_string_length, random_seed); + pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, random_seed + i, block_header)); } - - Chunk chunk(res_block.getColumns(), res_block.rows()); - pipes.emplace_back(std::make_shared(res_block.cloneEmpty(), std::move(chunk))); - return pipes; } diff --git a/dbms/src/Storages/StorageGenerate.h b/dbms/src/Storages/StorageGenerate.h index 6332b616c9a..4bb955bbabe 100644 --- a/dbms/src/Storages/StorageGenerate.h +++ b/dbms/src/Storages/StorageGenerate.h @@ -23,15 +23,13 @@ public: unsigned num_streams) override; private: - Block res_block; - UInt64 max_array_length = 10; UInt64 max_string_length = 10; UInt64 random_seed = 0; protected: StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_, - UInt64 max_array_length, UInt64 max_string_length,UInt64 random_seed); + UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed); }; } From 5068346e59d7658aea4607285eccba25e670e54c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 2 Mar 2020 18:08:35 +0300 Subject: [PATCH 088/215] fixup --- .../performance/group_array_moving_sum.xml | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dbms/tests/performance/group_array_moving_sum.xml b/dbms/tests/performance/group_array_moving_sum.xml index 45878d43e0e..fadd225e1a2 100644 --- a/dbms/tests/performance/group_array_moving_sum.xml +++ b/dbms/tests/performance/group_array_moving_sum.xml @@ -18,19 +18,19 @@ INSERT INTO moving_sum_1m SELECT number%100, rand() from numbers(1000000) INSERT INTO moving_sum_10m SELECT number%100, rand() from numbers(10000000) - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1m GROUP BY k - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1m GROUP BY k - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1m GROUP BY k - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m GROUP BY k - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m GROUP BY k - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m GROUP BY k - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null DROP TABLE IF EXISTS moving_sum_10m DROP TABLE IF EXISTS moving_sum_1m From 7e542053fc01fc666e4ae77ad31467834e415bf7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 2 Mar 2020 18:34:43 +0300 Subject: [PATCH 089/215] fixup --- dbms/tests/performance/consistent_hashes.xml | 4 +++- dbms/tests/performance/cryptographic_hashes.xml | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dbms/tests/performance/consistent_hashes.xml b/dbms/tests/performance/consistent_hashes.xml index aee232ddce6..f3858e16a28 100644 --- a/dbms/tests/performance/consistent_hashes.xml +++ b/dbms/tests/performance/consistent_hashes.xml @@ -27,5 +27,7 @@ SELECT {hash_func}(number, {buckets}) FROM numbers(1000000) - SELECT sumburConsistentHash(toUInt32(number), {buckets}) FROM numbers(1000000) + + + SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(1000000) diff --git a/dbms/tests/performance/cryptographic_hashes.xml b/dbms/tests/performance/cryptographic_hashes.xml index 71c37ed9f8d..7bafb25f299 100644 --- a/dbms/tests/performance/cryptographic_hashes.xml +++ b/dbms/tests/performance/cryptographic_hashes.xml @@ -29,6 +29,11 @@ materialize('') toString(1000000000+number) + + + + string_slow + materialize('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris sollicitudin nisi ac erat mollis dapibus. Maecenas leo purus, bibendum eu erat eget, iaculis molestie tortor. Phasellus maximus odio nec mauris ultrices dictum. Morbi efficitur nisl eget congue mollis. Vestibulum pharetra diam vitae urna interdum, eget ultricies justo sollicitudin. Nunc sit amet purus id leo tempus dignissim. Donec ac lacus ut orci tempus scelerisque quis ultricies nibh. Nullam lobortis, erat ac ullamcorper interdum, odio nisl elementum quam, ut malesuada massa nunc eget quam. Nam suscipit neque quis sapien ultricies imperdiet. Maecenas augue libero, finibus tristique sagittis et, semper nec arcu. Morbi non tortor ultrices, sollicitudin justo sed, accumsan ligula. Nullam at ipsum in nibh auctor ullamcorper. Nullam laoreet neque id lorem condimentum tincidunt. Nullam vel orci nibh. Ut sit amet sem faucibus, fringilla orci at, lacinia enim. Mauris imperdiet ex id scelerisque eleifend. Ut tincidunt massa nibh, viverra pharetra metus') @@ -39,7 +44,15 @@ numbers_mt(10000000) + + table_slow + + numbers(100000) + numbers_mt(1000000) + + - SELECT ignore({crypto_hash_func}({string})) FROM {table} + SELECT ignore({crypto_hash_func}({string})) FROM {table} FORMAT Null + SELECT ignore({crypto_hash_func}({string_slow})) FROM {table_slow} FORMAT Null From 42d608317223a87d7b6459c91868fc4fc0a66c30 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 2 Mar 2020 18:37:44 +0300 Subject: [PATCH 090/215] fixup --- dbms/tests/performance/base64.xml | 4 ++-- dbms/tests/performance/bitCount.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/performance/base64.xml b/dbms/tests/performance/base64.xml index 8860ec232e8..dbf8e0dc981 100644 --- a/dbms/tests/performance/base64.xml +++ b/dbms/tests/performance/base64.xml @@ -23,8 +23,8 @@ table - numbers(1000000) - numbers_mt(10000000) + numbers(10000000) + numbers_mt(100000000) diff --git a/dbms/tests/performance/bitCount.xml b/dbms/tests/performance/bitCount.xml index c714610f351..60901885dbd 100644 --- a/dbms/tests/performance/bitCount.xml +++ b/dbms/tests/performance/bitCount.xml @@ -22,5 +22,5 @@ - SELECT bitCount({expr}) FROM numbers(1000000) + SELECT bitCount({expr}) FROM numbers(1000000) FORMAT Null From 0eb47b3415e355add689e56b28478bc6f9490765 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 2 Mar 2020 19:50:18 +0300 Subject: [PATCH 091/215] Change endless performance tests (5) --- dbms/tests/performance/if_string_const.xml | 12 +++++------- dbms/tests/performance/if_to_multiif.xml | 13 ++++++------- dbms/tests/performance/information_value.xml | 7 +++---- dbms/tests/performance/linear_regression.xml | 1 - .../performance/random_printable_ascii.xml | 17 ++++++++--------- 5 files changed, 22 insertions(+), 28 deletions(-) diff --git a/dbms/tests/performance/if_string_const.xml b/dbms/tests/performance/if_string_const.xml index 15a281685ae..5b06440473f 100644 --- a/dbms/tests/performance/if_string_const.xml +++ b/dbms/tests/performance/if_string_const.xml @@ -1,16 +1,14 @@ - once + loop - 1000 10000 - - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? 'hello' : 'world') - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? 'hello' : '') - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? toFixedString('hello', 5) : toFixedString('world', 5)) - SELECT count() FROM system.numbers WHERE NOT ignore(rand() % 2 ? '' : toFixedString('world', 5)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? 'hello' : 'world') + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? 'hello' : '') + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? toFixedString('hello', 5) : toFixedString('world', 5)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? '' : toFixedString('world', 5)) diff --git a/dbms/tests/performance/if_to_multiif.xml b/dbms/tests/performance/if_to_multiif.xml index 54d4b8ba842..7dee667a1bd 100644 --- a/dbms/tests/performance/if_to_multiif.xml +++ b/dbms/tests/performance/if_to_multiif.xml @@ -1,19 +1,18 @@ - once + loop - 1000 10000 - - - - + + + + diff --git a/dbms/tests/performance/information_value.xml b/dbms/tests/performance/information_value.xml index ed054eda40d..f5b73a18abc 100644 --- a/dbms/tests/performance/information_value.xml +++ b/dbms/tests/performance/information_value.xml @@ -2,7 +2,7 @@ loop - test.hits + hits_100m_single @@ -10,12 +10,11 @@ 10000 - 5000 20000 - SELECT categoricalInformationValue(Age < 15, IsMobile) - SELECT categoricalInformationValue(Age < 15, Age >= 15 and Age < 30, Age >= 30 and Age < 45, Age >= 45 and Age < 60, Age >= 60, IsMobile) + SELECT categoricalInformationValue(Age < 15, IsMobile) from hits_100m_single + SELECT categoricalInformationValue(Age < 15, Age >= 15 and Age < 30, Age >= 30 and Age < 45, Age >= 45 and Age < 60, Age >= 60, IsMobile) from hits_100m_single diff --git a/dbms/tests/performance/linear_regression.xml b/dbms/tests/performance/linear_regression.xml index 50634b6a60a..0b4892f71ec 100644 --- a/dbms/tests/performance/linear_regression.xml +++ b/dbms/tests/performance/linear_regression.xml @@ -3,7 +3,6 @@ - 3000 10000 diff --git a/dbms/tests/performance/random_printable_ascii.xml b/dbms/tests/performance/random_printable_ascii.xml index b37469c0aee..5fca705464e 100644 --- a/dbms/tests/performance/random_printable_ascii.xml +++ b/dbms/tests/performance/random_printable_ascii.xml @@ -1,19 +1,18 @@ - once + loop - 4000 10000 - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(10)) - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(100)) - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(1000)) - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(10000)) - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(rand() % 10)) - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(rand() % 100)) - SELECT count() FROM system.numbers WHERE NOT ignore(randomPrintableASCII(rand() % 1000)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(10)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(100)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(1000)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(10000)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(rand() % 10)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(rand() % 100)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(randomPrintableASCII(rand() % 1000)) From 55a08a278400fcc4816c74ec27a9d961006ab58c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 2 Mar 2020 20:05:41 +0300 Subject: [PATCH 092/215] Change endless performance tests (6) --- dbms/tests/performance/range.xml | 7 ++-- dbms/tests/performance/read_hits_with_aio.xml | 12 +++---- dbms/tests/performance/right.xml | 1 - dbms/tests/performance/round_down.xml | 13 ++++---- dbms/tests/performance/round_methods.xml | 33 +++++++++---------- dbms/tests/performance/set.xml | 7 ++-- dbms/tests/performance/simple_join_query.xml | 1 - 7 files changed, 34 insertions(+), 40 deletions(-) diff --git a/dbms/tests/performance/range.xml b/dbms/tests/performance/range.xml index 48463b535ef..ee61a22b0cf 100644 --- a/dbms/tests/performance/range.xml +++ b/dbms/tests/performance/range.xml @@ -1,14 +1,13 @@ - once + loop - 5000 10000 - SELECT count() FROM (SELECT range(number % 100) FROM system.numbers limit 10000000) - SELECT count() FROM (SELECT range(0, number % 100, 1) FROM system.numbers limit 10000000) + SELECT range(number % 100) FROM numbers(10000000) FORMAT Null + SELECT range(0, number % 100, 1) FROM numbers(10000000) FORMAT Null diff --git a/dbms/tests/performance/read_hits_with_aio.xml b/dbms/tests/performance/read_hits_with_aio.xml index 573da5a4c70..9b70dfaa421 100644 --- a/dbms/tests/performance/read_hits_with_aio.xml +++ b/dbms/tests/performance/read_hits_with_aio.xml @@ -1,20 +1,20 @@ + loop + - 5000 30000 - once - hits_1000m_single + hits_100m_single -SELECT count() FROM hits_1000m_single where UserID=1234567890 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 1, max_read_buffer_size = 10485760; +SELECT count() FROM hits_100m_single where UserID=1234567890 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 1, max_read_buffer_size = 10485760; SELECT count() FROM hits_1000m_single where EventDate between toDate('2013-07-10') and toDate('2013-07-16') and UserID=123 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 1, max_read_buffer_size = 10485760; -SELECT count() FROM hits_1000m_single where UserID=1234567890 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 0, max_read_buffer_size = 10485760; -SELECT count() FROM hits_1000m_single where EventDate between toDate('2013-07-10') and toDate('2013-07-16') and UserID=123 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 0, max_read_buffer_size = 10485760; +SELECT count() FROM hits_100m_single where UserID=1234567890 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 0, max_read_buffer_size = 10485760; +SELECT count() FROM hits_100m_single where EventDate between toDate('2013-07-10') and toDate('2013-07-16') and UserID=123 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 0, max_read_buffer_size = 10485760; diff --git a/dbms/tests/performance/right.xml b/dbms/tests/performance/right.xml index 06d4bdaa93f..55095d251f7 100644 --- a/dbms/tests/performance/right.xml +++ b/dbms/tests/performance/right.xml @@ -10,7 +10,6 @@ 10000 - 5000 20000 diff --git a/dbms/tests/performance/round_down.xml b/dbms/tests/performance/round_down.xml index 5275d69ad84..b14b5a9fb2a 100644 --- a/dbms/tests/performance/round_down.xml +++ b/dbms/tests/performance/round_down.xml @@ -1,20 +1,19 @@ - once + loop 10000 - 5000 20000 - SELECT count() FROM system.numbers WHERE NOT ignore(roundDuration(rand() % 65536)) - SELECT count() FROM system.numbers WHERE NOT ignore(roundDown(rand() % 65536, [0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000])) - SELECT count() FROM system.numbers WHERE NOT ignore(roundAge(rand() % 100)) - SELECT count() FROM system.numbers WHERE NOT ignore(roundDown(rand() % 100, [0, 1, 18, 25, 35, 45, 55])) - SELECT count() FROM system.numbers WHERE NOT ignore(roundDown(rand() % 65536, (SELECT groupArray(number) FROM numbers(65536)))) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDuration(rand() % 65536)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDown(rand() % 65536, [0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundAge(rand() % 100)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDown(rand() % 100, [0, 1, 18, 25, 35, 45, 55])) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDown(rand() % 65536, (SELECT groupArray(number) FROM numbers(65536)))) diff --git a/dbms/tests/performance/round_methods.xml b/dbms/tests/performance/round_methods.xml index b80a8977c33..54bd1e4af17 100644 --- a/dbms/tests/performance/round_methods.xml +++ b/dbms/tests/performance/round_methods.xml @@ -1,32 +1,31 @@ - once + loop 10000 - 5000 20000 - SELECT count() FROM system.numbers WHERE NOT ignore(round(toInt64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(roundBankers(toInt64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(floor(toInt64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(ceil(toInt64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(trunc(toInt64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(round(toInt64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundBankers(toInt64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(floor(toInt64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(ceil(toInt64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(trunc(toInt64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(round(toFloat64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(roundBankers(toFloat64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(floor(toFloat64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(ceil(toFloat64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(trunc(toFloat64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(round(toFloat64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundBankers(toFloat64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(floor(toFloat64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(ceil(toFloat64(number), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(trunc(toFloat64(number), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(round(toDecimal128(number, 0), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(roundBankers(toDecimal128(number, 0), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(floor(toDecimal128(number, 0), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(ceil(toDecimal128(number, 0), -2)) - SELECT count() FROM system.numbers WHERE NOT ignore(trunc(toDecimal128(number, 0), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(round(toDecimal128(number, 0), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundBankers(toDecimal128(number, 0), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(floor(toDecimal128(number, 0), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(ceil(toDecimal128(number, 0), -2)) + SELECT count() FROM numbers(1000000) WHERE NOT ignore(trunc(toDecimal128(number, 0), -2)) diff --git a/dbms/tests/performance/set.xml b/dbms/tests/performance/set.xml index 7f3ee4fd4c1..c142730a560 100644 --- a/dbms/tests/performance/set.xml +++ b/dbms/tests/performance/set.xml @@ -1,5 +1,5 @@ - once + loop long @@ -9,7 +9,6 @@ 10000 - 5000 20000 @@ -19,8 +18,8 @@ table - system.numbers - system.numbers_mt + numbers(1000000) + numbers_mt(10000000) diff --git a/dbms/tests/performance/simple_join_query.xml b/dbms/tests/performance/simple_join_query.xml index 1f6d6ba74d6..c13dd70a777 100644 --- a/dbms/tests/performance/simple_join_query.xml +++ b/dbms/tests/performance/simple_join_query.xml @@ -6,7 +6,6 @@ 30000 - 6000 60000 From b9a52f18f1cf115f95f9d4b421146ac1c2802a92 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 2 Mar 2020 20:13:49 +0300 Subject: [PATCH 093/215] fixup --- dbms/tests/performance/date_time.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/performance/date_time.xml b/dbms/tests/performance/date_time.xml index e099b468560..4e9cd2c4abd 100644 --- a/dbms/tests/performance/date_time.xml +++ b/dbms/tests/performance/date_time.xml @@ -132,11 +132,11 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(100000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) From 3f820595367f8dd36a80e9329128527c25ca1a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Rodriguez?= Date: Mon, 2 Mar 2020 18:21:46 +0100 Subject: [PATCH 094/215] Fixed some encoding style issues --- base/mysqlxx/include/mysqlxx/PoolFactory.h | 106 ++++----- base/mysqlxx/src/Pool.cpp | 5 +- base/mysqlxx/src/PoolFactory.cpp | 244 ++++++++++----------- 3 files changed, 180 insertions(+), 175 deletions(-) diff --git a/base/mysqlxx/include/mysqlxx/PoolFactory.h b/base/mysqlxx/include/mysqlxx/PoolFactory.h index 3c553b8b6da..a21c2baca2f 100644 --- a/base/mysqlxx/include/mysqlxx/PoolFactory.h +++ b/base/mysqlxx/include/mysqlxx/PoolFactory.h @@ -1,51 +1,55 @@ -#pragma once - -#include -#include -#include -#include "PoolWithFailover.h" - -#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1 -#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16 -#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 - -namespace mysqlxx -{ -/* - * PoolFactory.h - * This class is a helper singleton to mutualize connections to MySQL. - */ -class PoolFactory final : private boost::noncopyable -{ -public: - static PoolFactory & instance(); - - PoolFactory(const PoolFactory &) = delete; - - /** Allocates a PoolWithFailover to connect to MySQL. */ - PoolWithFailover Get(const std::string & config_name, - unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); - - /** Allocates a PoolWithFailover to connect to MySQL. */ - PoolWithFailover Get(const Poco::Util::AbstractConfiguration & config, - const std::string & config_name, - unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); - - void reset(); - - - ~PoolFactory() = default; - PoolFactory& operator=(const PoolFactory &) = delete; - -private: - PoolFactory(); - - struct Impl; - std::unique_ptr impl; -}; - -} +#pragma once + +#include +#include +#include + +#include + + +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 + + +namespace mysqlxx +{ + +/* + * PoolFactory.h + * This class is a helper singleton to mutualize connections to MySQL. + */ +class PoolFactory final : private boost::noncopyable +{ +public: + static PoolFactory & instance(); + + PoolFactory(const PoolFactory &) = delete; + + /** Allocates a PoolWithFailover to connect to MySQL. */ + PoolWithFailover Get(const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + /** Allocates a PoolWithFailover to connect to MySQL. */ + PoolWithFailover Get(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + void reset(); + + + ~PoolFactory() = default; + PoolFactory& operator=(const PoolFactory &) = delete; + +private: + PoolFactory(); + + struct Impl; + std::unique_ptr impl; +}; + +} diff --git a/base/mysqlxx/src/Pool.cpp b/base/mysqlxx/src/Pool.cpp index 10c599316b2..280c0e06276 100644 --- a/base/mysqlxx/src/Pool.cpp +++ b/base/mysqlxx/src/Pool.cpp @@ -22,7 +22,7 @@ void Pool::Entry::incrementRefCount() if (!data) return; ++(data->ref_count); - if(data->ref_count==1) + if (data->ref_count==1) mysql_thread_init(); } @@ -30,7 +30,8 @@ void Pool::Entry::decrementRefCount() { if (!data) return; - if (data->ref_count > 0) { + if (data->ref_count > 0) + { --(data->ref_count); if (data->ref_count==0) mysql_thread_end(); diff --git a/base/mysqlxx/src/PoolFactory.cpp b/base/mysqlxx/src/PoolFactory.cpp index 41e5944ebdc..8f07e22671f 100644 --- a/base/mysqlxx/src/PoolFactory.cpp +++ b/base/mysqlxx/src/PoolFactory.cpp @@ -1,122 +1,122 @@ -#include -#include -#include - -namespace mysqlxx -{ - -struct PoolFactory::Impl -{ - // Cache of already affected pools identified by their config name - std::map> pools; - - // Cache of Pool ID (host + port + user +...) cibling already established shareable pool - std::map pools_by_ids; - - /// Protect pools and pools_by_ids caches - std::mutex mutex; -}; - -PoolWithFailover PoolFactory::Get(const std::string & config_name, unsigned default_connections, - unsigned max_connections, size_t max_tries) -{ - return Get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries); -} - -/// Duplicate of code from StringUtils.h. Copied here for less dependencies. -static bool startsWith(const std::string & s, const char * prefix) -{ - return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); -} - -static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config, - const std::string & config_name) -{ - bool shared = config.getBool(config_name + ".share_connection", false); - - // Not shared no need to generate a name the pool won't be stored - if (!shared) - return ""; - - std::string entry_name = ""; - std::string host = config.getString(config_name + ".host", ""); - std::string port = config.getString(config_name + ".port", ""); - std::string user = config.getString(config_name + ".user", ""); - std::string db = config.getString(config_name + ".db", ""); - std::string table = config.getString(config_name + ".table", ""); - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_name, keys); - - if (config.has(config_name + ".replica")) - { - Poco::Util::AbstractConfiguration::Keys replica_keys; - config.keys(config_name, replica_keys); - for (const auto & replica_config_key : replica_keys) - { - /// There could be another elements in the same level in configuration file, like "user", "port"... - if (startsWith(replica_config_key, "replica")) - { - std::string replica_name = config_name + "." + replica_config_key; - std::string tmp_host = config.getString(replica_name + ".host", host); - std::string tmp_port = config.getString(replica_name + ".port", port); - std::string tmp_user = config.getString(replica_name + ".user", user); - entry_name += (entry_name.empty() ? "" : "|") + tmp_user + "@" + tmp_host + ":" + tmp_port + "/" + db; - } - } - } - else - { - entry_name = user + "@" + host + ":" + port + "/" + db; - } - return entry_name; -} - -PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & config, - const std::string & config_name, unsigned default_connections, unsigned max_connections, size_t max_tries) -{ - - std::lock_guard lock(impl->mutex); - if (auto entry = impl->pools.find(config_name); entry != impl->pools.end()) - { - return *(entry->second.get()); - } - else - { - std::string entry_name = getPoolEntryName(config, config_name); - if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end()) - { - entry = impl->pools.find(id->second); - std::shared_ptr pool = entry->second; - impl->pools.insert_or_assign(config_name, pool); - return *pool; - } - - auto pool = std::make_shared(config, config_name, default_connections, max_connections, max_tries); - // Check the pool will be shared - if (!entry_name.empty()) - { - // Store shared pool - impl->pools.insert_or_assign(config_name, pool); - impl->pools_by_ids.insert_or_assign(entry_name, config_name); - } - return *(pool.get()); - } -} - -void PoolFactory::reset() -{ - std::lock_guard lock(impl->mutex); - impl->pools.clear(); - impl->pools_by_ids.clear(); -} - -PoolFactory::PoolFactory() : impl(std::make_unique()) {} - -PoolFactory & PoolFactory::instance() -{ - static PoolFactory ret; - return ret; -} - -} +#include +#include +#include + +namespace mysqlxx +{ + +struct PoolFactory::Impl +{ + // Cache of already affected pools identified by their config name + std::map> pools; + + // Cache of Pool ID (host + port + user +...) cibling already established shareable pool + std::map pools_by_ids; + + /// Protect pools and pools_by_ids caches + std::mutex mutex; +}; + +PoolWithFailover PoolFactory::Get(const std::string & config_name, unsigned default_connections, + unsigned max_connections, size_t max_tries) +{ + return Get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries); +} + +/// Duplicate of code from StringUtils.h. Copied here for less dependencies. +static bool startsWith(const std::string & s, const char * prefix) +{ + return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); +} + +static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name) +{ + bool shared = config.getBool(config_name + ".share_connection", false); + + // Not shared no need to generate a name the pool won't be stored + if (!shared) + return ""; + + std::string entry_name = ""; + std::string host = config.getString(config_name + ".host", ""); + std::string port = config.getString(config_name + ".port", ""); + std::string user = config.getString(config_name + ".user", ""); + std::string db = config.getString(config_name + ".db", ""); + std::string table = config.getString(config_name + ".table", ""); + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_name, keys); + + if (config.has(config_name + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys replica_keys; + config.keys(config_name, replica_keys); + for (const auto & replica_config_key : replica_keys) + { + /// There could be another elements in the same level in configuration file, like "user", "port"... + if (startsWith(replica_config_key, "replica")) + { + std::string replica_name = config_name + "." + replica_config_key; + std::string tmp_host = config.getString(replica_name + ".host", host); + std::string tmp_port = config.getString(replica_name + ".port", port); + std::string tmp_user = config.getString(replica_name + ".user", user); + entry_name += (entry_name.empty() ? "" : "|") + tmp_user + "@" + tmp_host + ":" + tmp_port + "/" + db; + } + } + } + else + { + entry_name = user + "@" + host + ":" + port + "/" + db; + } + return entry_name; +} + +PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, unsigned default_connections, unsigned max_connections, size_t max_tries) +{ + + std::lock_guard lock(impl->mutex); + if (auto entry = impl->pools.find(config_name); entry != impl->pools.end()) + { + return *(entry->second.get()); + } + else + { + std::string entry_name = getPoolEntryName(config, config_name); + if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end()) + { + entry = impl->pools.find(id->second); + std::shared_ptr pool = entry->second; + impl->pools.insert_or_assign(config_name, pool); + return *pool; + } + + auto pool = std::make_shared(config, config_name, default_connections, max_connections, max_tries); + // Check the pool will be shared + if (!entry_name.empty()) + { + // Store shared pool + impl->pools.insert_or_assign(config_name, pool); + impl->pools_by_ids.insert_or_assign(entry_name, config_name); + } + return *(pool.get()); + } +} + +void PoolFactory::reset() +{ + std::lock_guard lock(impl->mutex); + impl->pools.clear(); + impl->pools_by_ids.clear(); +} + +PoolFactory::PoolFactory() : impl(std::make_unique()) {} + +PoolFactory & PoolFactory::instance() +{ + static PoolFactory ret; + return ret; +} + +} From e95ce301f5b1ed73895928e7c259c682d6f4af05 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Mar 2020 23:01:57 +0300 Subject: [PATCH 095/215] Update base64_hits.xml --- dbms/tests/performance/base64_hits.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/performance/base64_hits.xml b/dbms/tests/performance/base64_hits.xml index 5002edccaea..edf4321fa07 100644 --- a/dbms/tests/performance/base64_hits.xml +++ b/dbms/tests/performance/base64_hits.xml @@ -2,7 +2,7 @@ once - hits_10m_single + hits_100m_single @@ -27,7 +27,7 @@ - SELECT count() FROM hits_10m_single WHERE NOT ignore(base64Encode({string})) - SELECT count() FROM hits_10m_single WHERE base64Decode(base64Encode({string})) != {string} - SELECT count() FROM hits_10m_single WHERE tryBase64Decode(base64Encode({string})) != {string} + SELECT count() FROM hits_100m_single WHERE NOT ignore(base64Encode({string})) + SELECT count() FROM hits_100m_single WHERE base64Decode(base64Encode({string})) != {string} + SELECT count() FROM hits_100m_single WHERE tryBase64Decode(base64Encode({string})) != {string} From 634ca1feec395a0f8bfdf63ee5fd60d842d006e0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Mar 2020 23:02:55 +0300 Subject: [PATCH 096/215] Update array_join.xml --- dbms/tests/performance/array_join.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/tests/performance/array_join.xml b/dbms/tests/performance/array_join.xml index 7574c5f8b15..e5025695d15 100644 --- a/dbms/tests/performance/array_join.xml +++ b/dbms/tests/performance/array_join.xml @@ -8,10 +8,10 @@ - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 - SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(1000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 - SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM numbers(1000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 - SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM numbers(1000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(10000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(10000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(10000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2] b FROM numbers(10000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM numbers(10000000)) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 + SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM numbers(10000000)) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1 From 3b54c5eae794b2494624a4b812d7d91dd5c32537 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Mar 2020 23:36:44 +0300 Subject: [PATCH 097/215] Update group_array_moving_sum.xml --- .../performance/group_array_moving_sum.xml | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dbms/tests/performance/group_array_moving_sum.xml b/dbms/tests/performance/group_array_moving_sum.xml index fadd225e1a2..d5a0030b623 100644 --- a/dbms/tests/performance/group_array_moving_sum.xml +++ b/dbms/tests/performance/group_array_moving_sum.xml @@ -12,26 +12,26 @@ - CREATE TABLE moving_sum_1m(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k CREATE TABLE moving_sum_10m(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k + CREATE TABLE moving_sum_100m(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k - INSERT INTO moving_sum_1m SELECT number%100, rand() from numbers(1000000) INSERT INTO moving_sum_10m SELECT number%100, rand() from numbers(10000000) + INSERT INTO moving_sum_100m SELECT number%100, rand() from numbers(100000000) - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_100m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_100m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_100m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + DROP TABLE IF EXISTS moving_sum_100m DROP TABLE IF EXISTS moving_sum_10m - DROP TABLE IF EXISTS moving_sum_1m From 493354968bc3efc032c135ff1deb1d6e40e63693 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 2 Mar 2020 23:40:17 +0300 Subject: [PATCH 098/215] Update consistent_hashes.xml --- dbms/tests/performance/consistent_hashes.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/performance/consistent_hashes.xml b/dbms/tests/performance/consistent_hashes.xml index f3858e16a28..5929c6388d5 100644 --- a/dbms/tests/performance/consistent_hashes.xml +++ b/dbms/tests/performance/consistent_hashes.xml @@ -26,8 +26,8 @@ - SELECT {hash_func}(number, {buckets}) FROM numbers(1000000) + SELECT {hash_func}(number, {buckets}) FROM numbers(1000000) FORMAT Null - SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(1000000) + SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(1000000) FORMAT Null From 423c7e8a8668d2c7be9bc11e0992220919a04bca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Rodriguez?= Date: Tue, 3 Mar 2020 09:32:58 +0100 Subject: [PATCH 099/215] Fix compilation issue in Clang --- dbms/src/Interpreters/ExternalDictionariesLoader.cpp | 1 + dbms/src/Interpreters/InterpreterSystemQuery.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index c53537b80cd..02388028c73 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -1,5 +1,6 @@ #include #include +#include "config_core.h" #if USE_MYSQL # include diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 7c5043154af..c6bca5bda2f 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -215,6 +215,7 @@ BlockIO InterpreterSystemQuery::execute() case Type::RELOAD_DICTIONARY: context.checkAccess(AccessType::RELOAD_DICTIONARY); system_context.getExternalDictionariesLoader().loadOrReload(query.target_dictionary); + ExternalDictionariesLoader::resetAll(); break; case Type::RELOAD_DICTIONARIES: context.checkAccess(AccessType::RELOAD_DICTIONARY); @@ -222,6 +223,7 @@ BlockIO InterpreterSystemQuery::execute() [&] () { system_context.getExternalDictionariesLoader().reloadAllTriedToLoad(); }, [&] () { system_context.getEmbeddedDictionaries().reload(); } ); + ExternalDictionariesLoader::resetAll(); break; case Type::RELOAD_EMBEDDED_DICTIONARIES: context.checkAccess(AccessType::RELOAD_DICTIONARY); From 1aac3a5c9dff467edbec8a1faea9dc45cbf26035 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 12:05:17 +0300 Subject: [PATCH 100/215] Fixing defaults --- .../Interpreters/InterpreterCreateQuery.cpp | 10 +------- dbms/src/Storages/ColumnsDescription.cpp | 24 ++++++++++++++++++- dbms/src/Storages/ColumnsDescription.h | 1 + 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 4f7f1f98fbd..2aa2810dbc9 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -315,15 +315,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres Block defaults_sample_block; /// set missing types and wrap default_expression's in a conversion-function if necessary if (!default_expr_list->children.empty()) - { - auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, column_names_and_types); - const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); - for (auto & action : actions->getActions()) - if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN) - throw Exception("Cannot CREATE table. Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); - - defaults_sample_block = actions->getSampleBlock(); - } + defaults_sample_block = validateColumnsDefaultsAndGetSampleBlock(default_expr_list, column_names_and_types, context); ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index 8b491679138..b923e9e255e 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -20,7 +22,9 @@ #include #include #include - +#include +#include +#include namespace DB { @@ -30,6 +34,7 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int ILLEGAL_COLUMN; extern const int CANNOT_PARSE_TEXT; + extern const int THERE_IS_NO_DEFAULT_VALUE; } ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_) @@ -421,4 +426,21 @@ ColumnsDescription ColumnsDescription::parse(const String & str) return result; } + +Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context) +{ + for (const auto & child : default_expr_list->children) + if (child->as() || child->as()) + throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + + auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, all_columns); + const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); + for (auto & action : actions->getActions()) + if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN) + throw Exception( + "Unsupported default value that requires ARRAY JOIN or JOIN action", + ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + + return actions->getSampleBlock(); +} } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index d0d042498fa..254be091b6b 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -114,4 +114,5 @@ private: Container columns; }; +Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context); } From e78ceebf6ee317308f952ba98b70c0e4c92c92ef Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 3 Mar 2020 13:00:51 +0300 Subject: [PATCH 101/215] fixup --- dbms/tests/performance/read_hits_with_aio.xml | 2 +- dbms/tests/performance/round_down.xml | 10 +++++----- dbms/tests/performance/set.xml | 4 ++-- dbms/tests/performance/simple_join_query.xml | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/tests/performance/read_hits_with_aio.xml b/dbms/tests/performance/read_hits_with_aio.xml index 9b70dfaa421..5fa3f70ed86 100644 --- a/dbms/tests/performance/read_hits_with_aio.xml +++ b/dbms/tests/performance/read_hits_with_aio.xml @@ -12,7 +12,7 @@ SELECT count() FROM hits_100m_single where UserID=1234567890 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 1, max_read_buffer_size = 10485760; -SELECT count() FROM hits_1000m_single where EventDate between toDate('2013-07-10') and toDate('2013-07-16') and UserID=123 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 1, max_read_buffer_size = 10485760; +SELECT count() FROM hits_100m_single where EventDate between toDate('2013-07-10') and toDate('2013-07-16') and UserID=123 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 1, max_read_buffer_size = 10485760; SELECT count() FROM hits_100m_single where UserID=1234567890 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 0, max_read_buffer_size = 10485760; SELECT count() FROM hits_100m_single where EventDate between toDate('2013-07-10') and toDate('2013-07-16') and UserID=123 SETTINGS max_threads = 1, min_bytes_to_use_direct_io = 0, max_read_buffer_size = 10485760; diff --git a/dbms/tests/performance/round_down.xml b/dbms/tests/performance/round_down.xml index b14b5a9fb2a..880b625af28 100644 --- a/dbms/tests/performance/round_down.xml +++ b/dbms/tests/performance/round_down.xml @@ -11,9 +11,9 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDuration(rand() % 65536)) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDown(rand() % 65536, [0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundAge(rand() % 100)) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDown(rand() % 100, [0, 1, 18, 25, 35, 45, 55])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundDown(rand() % 65536, (SELECT groupArray(number) FROM numbers(65536)))) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundDuration(rand() % 65536)) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundDown(rand() % 65536, [0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundAge(rand() % 100)) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundDown(rand() % 100, [0, 1, 18, 25, 35, 45, 55])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundDown(rand() % 65536, (SELECT groupArray(number) FROM numbers(65536)))) diff --git a/dbms/tests/performance/set.xml b/dbms/tests/performance/set.xml index c142730a560..75b87d38abe 100644 --- a/dbms/tests/performance/set.xml +++ b/dbms/tests/performance/set.xml @@ -18,8 +18,8 @@ table - numbers(1000000) - numbers_mt(10000000) + numbers(10000000) + numbers_mt(100000000) diff --git a/dbms/tests/performance/simple_join_query.xml b/dbms/tests/performance/simple_join_query.xml index c13dd70a777..919cce33be6 100644 --- a/dbms/tests/performance/simple_join_query.xml +++ b/dbms/tests/performance/simple_join_query.xml @@ -11,12 +11,12 @@ - CREATE TABLE join_table(A Int64, S0 String, S1 String, S2 String, S3 String)ENGINE = MergeTree ORDER BY A + CREATE TABLE join_table(A Int64, S0 String, S1 String, S2 String, S3 String) ENGINE = MergeTree ORDER BY A INSERT INTO join_table SELECT number AS A, toString(arrayMap(x->x, range(100))) S0, S0 AS S1, S0 AS S2, S0 AS S3 from numbers(500000) SELECT COUNT() FROM join_table LEFT JOIN join_table USING A - SELECT COUNT() FROM join_table LEFT JOIN (SELECT A FROM join_table) USING A + SELECT COUNT() FROM join_table LEFT JOIN (SELECT A FROM join_table) right USING A SELECT COUNT() FROM join_table AS left LEFT JOIN join_table AS right ON left.A = right.A SELECT COUNT() FROM join_table AS left LEFT JOIN (SELECT A FROM join_table) AS right ON left.A = right.A From a9edc5c3a4e53ab9fa1e1e9a9d483ba0ba1ded79 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 13:02:43 +0300 Subject: [PATCH 102/215] Restrict select queries in default expressions. Unify default checks in ALTER and CREATE queries. --- dbms/src/Storages/AlterCommands.cpp | 87 ++++++++++--------- dbms/src/Storages/AlterCommands.h | 10 --- dbms/src/Storages/ColumnsDescription.cpp | 23 +++-- dbms/src/Storages/ColumnsDescription.h | 4 + .../00079_defaulted_columns.reference | 1 - .../0_stateless/00079_defaulted_columns.sql | 5 +- 6 files changed, 66 insertions(+), 64 deletions(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index cebc4a4b2ba..84e50c576c3 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -541,6 +542,8 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Context & context) const { auto all_columns = metadata.columns; + /// Default expression for all added/modified. + ASTPtr default_expr_list = std::make_shared(); for (size_t i = 0; i < size(); ++i) { auto & command = (*this)[i]; @@ -561,9 +564,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con throw Exception{"Data type have to be specified for column " + backQuote(column_name) + " to add", ErrorCodes::BAD_ARGUMENTS}; - if (command.default_expression) - validateDefaultExpressionForColumn(command.default_expression, column_name, command.data_type, all_columns, context); - all_columns.add(ColumnDescription(column_name, command.data_type, false)); } else if (command.type == AlterCommand::MODIFY_COLUMN) @@ -576,22 +576,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con else continue; } - - auto column_in_table = metadata.columns.get(column_name); - if (command.default_expression) - { - if (!command.data_type) - validateDefaultExpressionForColumn( - command.default_expression, column_name, column_in_table.type, all_columns, context); - else - validateDefaultExpressionForColumn( - command.default_expression, column_name, command.data_type, all_columns, context); - } - else if (column_in_table.default_desc.expression && command.data_type) - { - validateDefaultExpressionForColumn( - column_in_table.default_desc.expression, column_name, command.data_type, all_columns, context); - } } else if (command.type == AlterCommand::DROP_COLUMN) { @@ -633,31 +617,52 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con if (metadata.settings_ast == nullptr) throw Exception{"Cannot alter settings, because table engine doesn't support settings changes", ErrorCodes::BAD_ARGUMENTS}; } - } -} -void AlterCommands::validateDefaultExpressionForColumn( - const ASTPtr default_expression, - const String & column_name, - const DataTypePtr column_type, - const ColumnsDescription & all_columns, - const Context & context) const -{ + /// Collect default expressions for MODIFY and ADD comands + if (command.type == AlterCommand::MODIFY_COLUMN || command.type == AlterCommand::ADD_COLUMN) + { + if (command.default_expression) + { + /// If we modify default, but not type + if (!command.data_type) + { + default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), column_name)); + } + else + { + const auto & final_column_name = column_name; + const auto tmp_column_name = final_column_name + "_tmp"; + const auto data_type_ptr = command.data_type; - try - { - String tmp_column_name = "__tmp" + column_name; - auto copy_expression = default_expression->clone(); - auto default_with_cast = makeASTFunction("CAST", copy_expression, std::make_shared(column_type->getName())); - auto query_with_alias = setAlias(default_with_cast, tmp_column_name); - auto syntax_result = SyntaxAnalyzer(context).analyze(query_with_alias, all_columns.getAll()); - ExpressionAnalyzer(query_with_alias, syntax_result, context).getActions(true); - } - catch (Exception & ex) - { - ex.addMessage("default expression and column type are incompatible. Cannot alter column " + backQuote(column_name)); - throw; + + default_expr_list->children.emplace_back(setAlias( + addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), + final_column_name)); + + default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); + } + } /// if we change data type for column with default + else if (metadata.columns.has(column_name) && command.data_type) + { + auto column_in_table = metadata.columns.get(column_name); + /// Column doesn't have a default, nothing to check + if (!column_in_table.default_desc.expression) + continue; + + const auto & final_column_name = column_name; + const auto tmp_column_name = final_column_name + "_tmp"; + const auto data_type_ptr = command.data_type; + + + default_expr_list->children.emplace_back(setAlias( + addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), final_column_name)); + + default_expr_list->children.emplace_back(setAlias(column_in_table.default_desc.expression->clone(), tmp_column_name)); + } + } } + + validateColumnsDefaultsAndGetSampleBlock(default_expr_list, all_columns.getAll(), context); } bool AlterCommands::isModifyingData() const diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 2b070f93f6d..134e09dad01 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -117,16 +117,6 @@ class AlterCommands : public std::vector { private: bool prepared = false; -private: - - /// Validate that default expression and type are compatible, i.e. default - /// expression result can be casted to column_type - void validateDefaultExpressionForColumn( - const ASTPtr default_expression, - const String & column_name, - const DataTypePtr column_type, - const ColumnsDescription & all_columns, - const Context & context) const; public: /// Validate that commands can be applied to metadata. diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index b923e9e255e..fb03d2c375c 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -433,14 +433,21 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N if (child->as() || child->as()) throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); - auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, all_columns); - const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); - for (auto & action : actions->getActions()) - if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN) - throw Exception( - "Unsupported default value that requires ARRAY JOIN or JOIN action", - ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + try + { + auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, all_columns); + const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); + for (auto & action : actions->getActions()) + if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN) + throw Exception("Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); - return actions->getSampleBlock(); + return actions->getSampleBlock(); + } + catch (Exception & ex) + { + ex.addMessage("default expression and column type are incompatible."); + throw; + } } + } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 254be091b6b..f930b333577 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -114,5 +114,9 @@ private: Container columns; }; +/// Validate default expressions and corresponding types compatibility, i.e. +/// default expression result can be casted to column_type. Also checks, that we +/// don't have strange constructions in default expression like SELECT query or +/// arrayJoin function. Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context); } diff --git a/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference b/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference index 03fd13f9044..53c257874c9 100644 --- a/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference +++ b/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference @@ -5,7 +5,6 @@ col3 UInt64 MATERIALIZED col1 + 2 col4 UInt64 ALIAS col1 + 3 10 11 12 13 -99 payload String date Date MATERIALIZED today() key UInt64 MATERIALIZED 0 * rand() diff --git a/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql b/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql index 77178478a4d..617dc2435c4 100644 --- a/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql +++ b/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql @@ -11,10 +11,7 @@ select * from defaulted; select col3, col4 from defaulted; drop table defaulted; -create table defaulted (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; -insert into defaulted (col1) values (0); -select col2 from defaulted; -drop table defaulted; +create table defaulted (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; --{serverError 116} create table defaulted (payload String, date materialized today(), key materialized 0 * rand()) engine=MergeTree(date, key, 8192); desc table defaulted; From 00c3898f32d01246d5ea275ef1cca0558ec8bdd8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 13:04:05 +0300 Subject: [PATCH 103/215] better comment --- dbms/src/Storages/AlterCommands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 84e50c576c3..dd282a177da 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -542,7 +542,7 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Context & context) const { auto all_columns = metadata.columns; - /// Default expression for all added/modified. + /// Default expression for all added/modified columns ASTPtr default_expr_list = std::make_shared(); for (size_t i = 0; i < size(); ++i) { From 825f86759171c411cf950af09b8b6dd23794fd3c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 3 Mar 2020 13:09:47 +0300 Subject: [PATCH 104/215] fixup --- dbms/tests/performance/if_to_multiif.xml | 6 ++++++ dbms/tests/performance/linear_regression.xml | 11 ++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dbms/tests/performance/if_to_multiif.xml b/dbms/tests/performance/if_to_multiif.xml index 7dee667a1bd..03f528d6349 100644 --- a/dbms/tests/performance/if_to_multiif.xml +++ b/dbms/tests/performance/if_to_multiif.xml @@ -7,6 +7,12 @@ + + + nonexistent_table_if_multiif + + diff --git a/dbms/tests/performance/linear_regression.xml b/dbms/tests/performance/linear_regression.xml index 0b4892f71ec..c358e21af05 100644 --- a/dbms/tests/performance/linear_regression.xml +++ b/dbms/tests/performance/linear_regression.xml @@ -9,6 +9,7 @@ test.hits + hits_100m_single @@ -16,17 +17,17 @@ CREATE TABLE test_model engine = Memory as select stochasticLinearRegressionState(0.0001)(Age, Income, ParamPrice, Robotness, RefererHash) as state from test.hits - WITH (SELECT stochasticLinearRegressionState(0.0001, 0, 15)(Age, Income, ParamPrice, Robotness, RefererHash) FROM test.hits) AS model SELECT 1 - SELECT stochasticLinearRegression(Age, Income, ParamPrice, Robotness, RefererHash) FROM test.hits + SELECT stochasticLinearRegressionState(0.0001, 0, 15)(Age, Income, ParamPrice, Robotness, RefererHash) FROM test.hits FORMAT Null + SELECT stochasticLinearRegression(Age, Income, ParamPrice, Robotness, RefererHash) FROM test.hits FORMAT Null - WITH (SELECT stochasticLinearRegressionState(0.0001, 0, 15, 'Momentum')(Age, Income, ParamPrice, Robotness, RefererHash) FROM test.hits) AS model SELECT 1 + SELECT stochasticLinearRegressionState(0.0001, 0, 15, 'Momentum')(Age, Income, ParamPrice, Robotness, RefererHash) FROM hits_100m_single FORMAT Null - WITH (SELECT stochasticLinearRegressionState(0.0001, 0, 15, 'Nesterov')(Age, Income, ParamPrice, Robotness, RefererHash) FROM test.hits) AS model SELECT 1 + SELECT stochasticLinearRegressionState(0.0001, 0, 15, 'Nesterov')(Age, Income, ParamPrice, Robotness, RefererHash) FROM hits_100m_single FORMAT Null - with (SELECT state FROM test_model) as model select evalMLMethod(model, Income, ParamPrice, Robotness, RefererHash) from test.hits + WITH (SELECT state FROM test_model) AS model SELECT evalMLMethod(model, Income, ParamPrice, Robotness, RefererHash) FROM hits_100m_single FORMAT Null DROP TABLE IF EXISTS test_model From 66fbc39357ac7433103ea7df6794b6900d04bf0b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 3 Mar 2020 13:17:23 +0300 Subject: [PATCH 105/215] fixup --- .../performance/group_array_moving_sum.xml | 30 +++++++++---------- dbms/tests/performance/if_array_num.xml | 12 ++++---- dbms/tests/performance/if_array_string.xml | 12 ++++---- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/dbms/tests/performance/group_array_moving_sum.xml b/dbms/tests/performance/group_array_moving_sum.xml index d5a0030b623..6939989c5b4 100644 --- a/dbms/tests/performance/group_array_moving_sum.xml +++ b/dbms/tests/performance/group_array_moving_sum.xml @@ -12,26 +12,26 @@ - CREATE TABLE moving_sum_10m(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k CREATE TABLE moving_sum_100m(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k + CREATE TABLE moving_sum_1000m(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k - INSERT INTO moving_sum_10m SELECT number%100, rand() from numbers(10000000) INSERT INTO moving_sum_100m SELECT number%100, rand() from numbers(100000000) + INSERT INTO moving_sum_1000m SELECT number%100, rand() from numbers(1000000000) - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_10m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_100m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_100m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_100m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_100m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_100m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_100m GROUP BY k FORMAT Null - SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_100m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1000m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10)(v) FROM moving_sum_1000m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1000m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(1000)(v) FROM moving_sum_1000m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1000m GROUP BY k FORMAT Null + SELECT k,groupArrayMovingSum(10000)(v) FROM moving_sum_1000m WHERE k in (49, 50, 51) GROUP BY k FORMAT Null + DROP TABLE IF EXISTS moving_sum_1000m DROP TABLE IF EXISTS moving_sum_100m - DROP TABLE IF EXISTS moving_sum_10m diff --git a/dbms/tests/performance/if_array_num.xml b/dbms/tests/performance/if_array_num.xml index 375290e635c..d4c9c29dd99 100644 --- a/dbms/tests/performance/if_array_num.xml +++ b/dbms/tests/performance/if_array_num.xml @@ -8,10 +8,10 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [4, 5]) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([4, 5])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([4, 5])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [400, 500]) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([400, 500])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([400, 500])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [4, 5]) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([4, 5])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([4, 5])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : [400, 500]) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? [1, 2, 3] : materialize([400, 500])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? materialize([1, 2, 3]) : materialize([400, 500])) diff --git a/dbms/tests/performance/if_array_string.xml b/dbms/tests/performance/if_array_string.xml index 1f14393ee16..235051fc905 100644 --- a/dbms/tests/performance/if_array_string.xml +++ b/dbms/tests/performance/if_array_string.xml @@ -8,10 +8,10 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c']) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c']) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c'])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : materialize(['a', 'b', 'c'])) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['', '']) : emptyArrayString()) - SELECT count() FROM numbers(1000000) WHERE NOT ignore(rand() % 2 ? materialize(['https://github.com/ClickHouse/ClickHouse/pull/1070', 'https://www.google.ru/search?newwindow=1&site=&source=hp&q=zookeeper+wire+protocol+exists&oq=zookeeper+wire+protocol+exists&gs_l=psy-ab.3...330.6300.0.6687.33.28.0.0.0.0.386.4838.0j5j9j5.19.0....0...1.1.64.psy-ab..14.17.4448.0..0j35i39k1j0i131k1j0i22i30k1j0i19k1j33i21k1.r_3uFoNOrSU']) : emptyArrayString()) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c']) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c']) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c'])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : materialize(['a', 'b', 'c'])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['', '']) : emptyArrayString()) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['https://github.com/ClickHouse/ClickHouse/pull/1070', 'https://www.google.ru/search?newwindow=1&site=&source=hp&q=zookeeper+wire+protocol+exists&oq=zookeeper+wire+protocol+exists&gs_l=psy-ab.3...330.6300.0.6687.33.28.0.0.0.0.386.4838.0j5j9j5.19.0....0...1.1.64.psy-ab..14.17.4448.0..0j35i39k1j0i131k1j0i22i30k1j0i19k1j33i21k1.r_3uFoNOrSU']) : emptyArrayString()) From e594955618c0f8f6f40ce73c57447bb55046367c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 3 Mar 2020 13:49:16 +0300 Subject: [PATCH 106/215] boop From 42fc5efb52182a23772d5d5785bd51291189082b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 3 Mar 2020 13:50:47 +0300 Subject: [PATCH 107/215] boop From 8acf6834ec304599f12e3fcce19a49f0711701b8 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 3 Mar 2020 14:31:18 +0300 Subject: [PATCH 108/215] fix finite result for infinite query --- dbms/src/Storages/StorageGenerate.cpp | 32 +- .../01087_storage_generate.reference | 202 +++++----- .../0_stateless/01087_storage_generate.sql | 2 +- .../01087_table_function_generate.reference | 346 +++++++++--------- 4 files changed, 286 insertions(+), 296 deletions(-) diff --git a/dbms/src/Storages/StorageGenerate.cpp b/dbms/src/Storages/StorageGenerate.cpp index e38a14fb3f6..ed9a31a5e03 100644 --- a/dbms/src/Storages/StorageGenerate.cpp +++ b/dbms/src/Storages/StorageGenerate.cpp @@ -36,7 +36,7 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -void fillColumnWithRandomData(IColumn & column, DataTypePtr type, UInt64 limit, +void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg32& generator, pcg64_fast& generator64) { TypeIndex idx = type->getTypeId(); @@ -407,26 +407,15 @@ public: protected: Chunk generate() override { - - for (auto & ctn : block_header.getColumnsWithTypeAndName()) + auto columns = block_header.cloneEmptyColumns(); + DataTypes types = block_header.getDataTypes(); + auto cur_type = types.cbegin(); + for (auto & col : columns) { - fillColumnWithRandomData(ctn.column->assumeMutableRef(), ctn.type, block_size, max_array_length, max_string_length, r32, r64); + fillColumnWithRandomData(col->assumeMutableRef(), *cur_type, block_size, max_array_length, max_string_length, r32, r64); + ++cur_type; } - - auto column = ColumnUInt64::create(block_size); - ColumnUInt64::Container & vec = column->getData(); - - size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class. - UInt64 * pos = vec.data(); /// This also accelerates the code. - UInt64 * end = &vec[block_size]; - while (pos < end) - *pos++ = curr++; - - next += step; - - progress({column->size(), column->byteSize()}); - - return { Columns {std::move(column)}, block_size }; + return {std::move(columns), block_size}; } private: @@ -436,7 +425,7 @@ private: Block block_header; pcg32 r32; - pcg64 r64; + pcg64_fast r64; }; @@ -492,9 +481,10 @@ Pipes StorageGenerate::read( block_header.insert({std::move(column), name_type.type, name_type.name}); } + pcg32 generate(random_seed); for (UInt64 i = 0; i < num_streams; ++i) { - pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, random_seed + i, block_header)); + pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header)); } return pipes; } diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.reference b/dbms/tests/queries/0_stateless/01087_storage_generate.reference index 1038fb2baec..33e5fdd18ee 100644 --- a/dbms/tests/queries/0_stateless/01087_storage_generate.reference +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.reference @@ -1,103 +1,103 @@ -65536 +100 - -[-59] 123481.7989 ('2007-02-20 10:43:46.989','4024497c-b1f5-5b35-44e7-4cf478103f02') -[-78] 117195.7426 ('2002-10-04 02:54:48.647','a4739e76-3fbd-b537-34b8-c2b4c272296f') -[-25,-72] 27510.0647 ('1974-04-19 01:48:12.192','1e8f333f-d6c7-cd48-7a23-dd0a93d8418f') -[] 103368.5688 ('2002-01-28 12:47:02.271','f8cf0cb7-8a30-acab-b618-22f5db288237') -[-40] 18089.5192 ('2053-11-20 07:10:58.662','fe7c4fab-b30e-0db2-fb21-64a726f71730') -[] 13555.7292 ('1986-04-08 19:07:15.849','dd091972-50eb-59db-242f-0c7311995279') -[-84,95] 71691.4271 ('2081-03-06 04:00:55.914','c87feb6e-decf-2422-14d3-3312e8eef7b3') -[22,38] 101221.1222 ('1979-01-20 20:39:20.939','da4f0e49-9267-2228-10d9-dcd46ccff515') -[82,65] -210924.4634 ('2063-07-18 01:46:10.215','400d5020-f53c-224d-0e25-215f73801b7e') -[35] -164774.2638 ('1996-11-02 14:35:41.110','8a9107d8-e5ce-78a0-fa35-9dc406168d47') -[-110] 31951.0849 ('2053-05-14 03:43:37.023','2555c8e2-d792-ae98-10bd-fe81c5e641cc') -[] 51335.6835 ('1981-10-04 07:39:07.577','8f493375-6f27-a1fd-b842-4acd2a947bd2') -[] -196651.8382 ('2033-06-05 16:33:53.429','88b7da3c-3764-d8e1-899d-309d5ad39592') -[-57] -78651.8841 ('2007-03-01 02:20:37.021','86d48a21-34e6-a5e1-90d4-82d31ac7241c') -[-69,72] 26916.2939 ('1994-07-22 12:08:38.312','fd121990-fe32-068d-8db5-54d21f0a57de') -[119,-78] 28570.1960 ('2057-11-30 11:13:40.087','613f329c-8a56-3c7e-1204-896646a3e2dd') -[] 194390.8215 ('2032-12-16 20:51:40.438','0ddc325a-2efd-2077-c088-2022848fb2d1') -[-58] -134302.9326 ('2061-10-06 12:01:12.483','8562e417-91f0-0001-9b06-ad439901f8b3') -[] 147418.3110 ('1993-04-02 08:18:32.212','3cbe5bb9-b32b-4bbe-8d62-134814725e48') -[13,39] 84693.4541 ('1981-10-12 14:43:24.346','dbfd2279-6cb4-c759-12d9-087f286a9c4d') -[] 100781.8023 ('1974-09-17 01:22:36.779','df51bec4-2bce-6677-dea6-5b6475de0ba5') -[-71] -166417.1079 ('2073-02-07 14:31:17.292','6a41ce1b-e8dd-72cb-fba0-181c08589288') -[] 19505.0577 ('1986-07-21 04:27:39.201','9b96ad62-efcf-9072-f13b-d52a37f1a3ab') -[81] 37101.8347 ('1977-10-29 19:01:33.177','a0233bd5-395e-83b3-5110-09f07aa65dbe') -[107] 73417.3429 ('2020-01-24 07:07:28.482','1b2c847d-9d2a-9f27-bc34-83cb7f72865b') -[-11,-63] 200159.1233 ('2020-01-30 04:35:18.644','ef21c730-5836-e387-f7f1-40f10d68ca57') -[-59,69] -181229.7275 ('2064-06-03 23:24:18.831','9ddbdfb5-ee6f-de37-dfd5-2eb2abc28aa3') -[-80] 117270.4837 ('2025-07-05 23:52:10.553','14ad622b-6ac9-d754-dbbb-4175b36e7c89') -[] -72892.3984 ('2004-12-18 09:50:49.468','05e59bb3-0e6d-d0ef-9ab7-b9317d2f2766') -[-122,87] 77486.4518 ('2028-11-09 07:52:54.371','7e8dd973-c940-2376-73aa-e49d91515247') -[-76] -46258.3209 ('2004-11-06 16:14:05.356','37535aa6-da8a-4061-0a54-841921e5c738') -[22] -152063.3676 ('2056-01-04 23:10:33.111','aa562a59-a034-09a8-6e5a-645bae68fc48') -[-84,-45] -63890.6858 ('2105-08-23 19:09:37.313','5bd71372-4206-ec00-3437-e4fb80e102fc') -[-40] 198683.2300 ('2103-03-12 03:53:10.401','e5022161-1839-1090-2ab1-b838c6ffab4f') -[] 37877.4483 ('2082-06-07 23:24:18.812','c85042c0-39f7-11f1-818c-1e23b5e00c2e') -[] -139915.2424 ('2024-02-04 00:07:50.053','f9cb5e7b-e089-fdd2-657e-086d358724be') -[] -95386.3084 ('1981-01-11 00:05:50.229','54ec5688-abd1-90e1-858f-a8a438f0d849') -[84,-104] 73372.4312 ('2076-08-06 12:43:39.780','20b4adfb-1be0-fd03-72ae-b99f240a8966') -[-86,-36] -2365.2950 ('2001-09-19 08:39:23.076','573ee213-d5c4-a84a-dfe3-ada8f7da77a2') -[] 37173.5004 ('2078-03-06 07:00:01.036','c0ef8e39-8c05-7d05-57a1-8619d16ed0a5') -[] 46211.8779 ('1976-07-28 03:24:19.781','4f7a49fa-f019-1cd4-2d52-21498c4b045b') -[] 14860.2156 ('1991-04-27 19:46:05.572','84f42bdf-87a2-0d6d-a9f3-b7b4c1b580e2') -[123] -105538.4004 ('1997-11-23 06:16:09.365','fa7cfe86-97d2-fe17-7f15-92bc9e23152a') -[44,60] -104127.4619 ('2079-08-19 02:49:25.286','942117e0-eb15-8f8b-f99a-bca4a0287384') -[5] 24776.2201 ('2075-06-16 22:44:08.959','f521453f-bf21-e08a-6342-24b51fb90c57') -[] 52228.9659 ('2103-08-12 09:20:19.320','278ec6db-d590-8174-c652-55a3095d6944') -[] 82221.0177 ('2040-04-25 21:25:27.868','f5cd7060-2008-b90d-091b-541c0c78222b') -[] 24698.8893 ('1982-08-27 15:16:17.588','ee758de3-6f8b-8292-4749-7b94ae95e935') -[] -149720.1814 ('2068-04-30 18:40:02.978','980caaa3-0c07-4ba5-6e59-d8d9bf743a00') -[25] 157983.8848 ('2044-07-03 11:35:24.630','0635f169-ad37-2554-cfd5-55bc16422776') -[] 15685.6644 ('1989-02-11 00:41:34.105','5bbcbd57-dcc0-bb3c-6e57-510a128a6ef1') -[] 158034.8118 ('1999-12-16 05:44:17.565','e8cdefb3-d4e5-359f-196c-b936eecbdd5a') -[] 25575.9831 ('2000-11-22 21:53:04.462','0b0679fa-c5a7-1e42-9314-201bde1d4887') -[] -131521.7038 ('2068-07-22 12:38:12.718','90503482-35b5-ceb1-8b8c-1f39ed0c0794') -[-5,-127] -129970.8743 ('2019-01-12 22:34:10.162','7638ba6e-5cb2-226b-e2d8-1ad44db1a63b') -[93] 175174.8730 ('2067-02-19 09:03:20.655','279568d0-0e13-38b7-d730-da3366ebaf72') -[] 42444.6468 ('2092-09-30 17:53:17.987','49576daf-0b31-fb3c-6199-628ed5d84004') -[106] 110335.2649 ('2069-01-25 06:39:42.982','d8e68488-f870-fa61-b52c-c0fa8be53385') -[-128] -87556.7925 ('2087-08-05 12:06:19.275','d562d3f5-1a2c-4453-f62a-8a17e841aa1c') -[] 185735.8374 ('2011-11-15 14:26:49.202','0dd6dcb7-3157-b1db-3703-17cd7af3fca3') -[68,-42] 170844.0356 ('2054-08-20 04:32:45.269','9bbe3850-5d18-26b7-b063-3bdba428e87b') -[-41,114] 109974.6845 ('2025-09-13 00:57:59.049','5a496c7e-4354-a8b2-bfd7-e3dd39ba7c75') -[-71,122] 136722.5111 ('2080-04-23 02:18:55.233','8e6a19d8-5088-89d8-4d47-98dced3729f7') -[] -158072.5063 ('2009-07-20 22:51:59.310','0b4699b4-3152-6de8-bc97-856990eba662') -[4,73] 119730.4313 ('2069-03-26 07:01:04.910','6fa07110-c24b-2205-ca3e-e95cbc6ab99a') -[-53] -1448.0319 ('2048-05-08 01:42:42.513','aed73313-3fe7-20ab-74fc-1974deb5feea') -[38,36] -81638.8895 ('1977-11-05 20:56:01.160','3cfa66a6-e302-5857-f14e-d1c95f79304e') -[29] -9186.3306 ('2050-10-28 21:34:49.604','00446ede-1f63-f29b-861b-907ef784c10d') -[] 51541.3812 ('2007-04-05 14:53:34.957','bc06c623-b8b4-c509-822d-c4ffabfb3bb1') -[] -74687.7838 ('2022-03-01 23:37:40.261','39d28440-5447-2e11-58d8-3e996d180710') -[23,-71] -128826.5243 ('2043-12-19 01:47:45.951','bc0d838d-111b-1094-87fa-390ae50f798b') -[-7,65] 170699.4470 ('2036-08-08 20:44:43.440','083e90ba-4432-8f42-a768-28c2a5193705') -[-31] 25454.4229 ('2009-01-09 14:19:10.304','1d853ca8-8103-e9e0-0156-6a0c2cf1bebf') -[-10,52] 34800.8750 ('2029-12-27 00:48:20.183','c9b0b807-0a93-9051-eadd-e6c0f8f1870a') -[114,-27] 55543.7780 ('2055-02-27 10:34:17.324','03f59f39-7653-5537-8075-8af52335f89e') -[] -93103.4677 ('1998-12-07 09:19:18.592','5a7dc701-41c4-1d1c-d606-eec028da641a') -[] 154376.9076 ('2095-09-09 15:30:44.873','7825f0a7-3941-ded3-15ff-30614e66b184') -[38,101] 100087.4363 ('2018-08-10 11:21:25.476','f7188a9c-6256-0e15-b523-05f7381aa04f') -[46] -206787.1260 ('2083-05-07 15:59:28.021','f234dc7e-b529-2fa2-395a-4500cffb908b') -[-44] -88120.2495 ('2016-01-21 05:26:08.114','54bacc39-3b31-1a61-6001-d68df0b86006') -[-53] 142694.1781 ('2101-02-19 06:19:46.823','c1768c3a-2b5d-98ad-4389-21bb557ffdef') -[-12] 20736.1459 ('1970-07-26 11:10:41.972','aa68eaa8-26ed-bdfc-9ebf-a3e838688f16') -[123] 126601.4572 ('2081-05-25 01:26:41.416','af4c25a2-5dde-fc3d-07ee-56a89deca2ca') -[-28,-63] 67277.0765 ('1996-09-26 12:48:08.251','69fb36c6-bffa-3499-1da9-d1488fe076cc') -[85,-77] 85443.0365 ('2018-10-16 22:39:35.338','b9bed16e-b0b3-1718-efab-83350513300c') -[] 88025.4969 ('2014-11-03 20:06:23.407','f66ceccc-b716-0ea6-cc08-0bcabbc32f82') -[] 41636.0286 ('2094-10-08 03:09:52.518','a97f04ea-d317-c160-979d-75077f536c92') -[108] -83533.9131 ('2080-09-15 20:01:54.317','15f62089-2425-dea7-359f-5d0c5fc76a3c') -[] 80004.7959 ('2003-05-06 13:18:36.664','c62de05e-ff2e-776d-358b-1afa4a914d8a') -[-51,-99] -96702.7448 ('2103-01-05 12:14:12.415','927c495e-b2b2-0db2-c514-0cc92ad57275') -[-7,94] 141107.4320 ('2086-04-08 19:12:41.784','f2f7aa7b-625e-e5a4-df9c-a3299732b43b') -[-123] -7862.4477 ('2037-11-17 09:32:32.517','b4d10717-04b8-89b5-ecb0-1886ad3eab71') -[] 117563.9868 ('2009-07-13 18:40:42.823','e5fbd00e-f8e5-fc29-fb38-3cfccae29cca') -[] -207597.6169 ('2054-05-27 09:25:02.389','c94806f0-079b-1b4f-08cb-5665cfd7f0ba') -[87] 37163.8588 ('2061-02-02 04:33:21.217','6a47246b-0b6d-2621-6ff4-e141c473558b') -[8,16] 39929.4977 ('2084-02-16 03:32:41.835','9fefe1e3-5cfe-9b6e-79f8-1bee729f57d9') -[] 57278.1978 ('2060-07-18 15:52:12.277','1b8aa41f-d775-655a-5ef0-0e2516ab73e0') -[35,60] -119194.1294 ('2027-11-17 12:54:40.246','8e0e1ff5-c982-27df-241b-ccb1f88b0965') -[] -24783.7666 ('2072-11-04 04:24:25.367','bcbe9529-7213-9aa2-9648-74a913f0a940') -[] -194381.9572 ('2094-02-21 01:42:11.385','2a51b55d-6f25-ad4b-398d-f27fb0241859') +[] -183162.1041 ('2074-01-15 19:36:42.413','0345f8ad-8936-8cc9-9ff2-394f225fc318') +[3] -39049.0845 ('2045-07-04 15:01:09.380','f79d47d8-4030-9916-54b9-495a5ccc7202') +[-95,-104] -14324.1685 ('2023-10-02 06:05:53.887','ea94157b-737b-a272-acd5-c7ab9c6f07c3') +[] -182420.0194 ('2002-01-08 01:42:50.396','ac579c0f-8523-144f-aa4c-c9587cc27144') +[] 127157.2100 ('2006-11-23 14:25:39.542','d07c5204-ef95-6804-83df-01dedaf32522') +[28] -198600.4267 ('2060-09-25 02:57:41.504','b13ff007-c245-d737-85b2-1fa003e57127') +[] -87232.0739 ('2027-05-12 20:26:59.405','a2f2cbf4-b11b-6976-7b91-14b6964acbe2') +[] -110349.8987 ('2042-11-01 10:51:30.039','445a77b5-0a27-3485-8dd8-c7cc35d2692f') +[32,73] 123253.5669 ('2016-06-21 00:23:02.917','c12095e6-b82c-d81c-4629-acd80e02b080') +[-109,85] 34320.8302 ('2080-12-31 16:49:32.509','ebbbe70a-0321-ff18-89de-2bc9a9e4c454') +[68,76] 160458.5593 ('2030-05-23 03:33:29.681','805b0a62-9ada-a47e-2d5e-63cb5923549c') +[69] -189322.2887 ('2084-06-19 03:31:58.508','429df3a4-ff18-28d5-9ad8-dcdd78e8b1ae') +[] 189158.4731 ('1985-12-21 16:36:51.092','d63c5cbb-9418-ce59-000c-056f88157bfa') +[-120] 114890.5905 ('2089-02-19 22:30:18.216','2bc4860a-7214-300a-851e-b61011c346ef') +[] 54493.1631 ('2062-08-18 04:57:01.348','c00d218a-913f-b657-1ff9-99927741f7ab') +[103] 88891.6006 ('2071-05-23 21:46:45.644','036d2746-f7aa-b5a4-b716-b8e8a5e041da') +[28] -41173.9863 ('2044-03-11 10:06:47.659','6bf54ef0-9bad-54d4-5ca3-02d79883b697') +[] -64809.0652 ('2010-11-02 23:46:46.150','ae5cafb4-fe3c-71a5-9a76-0314c44180de') +[125,74] 28139.7661 ('1989-07-19 22:10:13.477','ee9b8173-4426-8615-97eb-a904266847e5') +[-101] -29032.2527 ('2052-08-13 08:55:15.045','ab8ee358-ff53-de7e-f012-cd0eed754ff2') +[82] 170334.6151 ('2034-02-23 18:50:46.847','f1402791-1d23-e56d-25f0-5a51a3cb245a') +[27,56] 168648.5067 ('2004-05-31 19:47:19.448','5019365e-f74d-b31e-aacb-63d8189e3e3e') +[-74] 89577.4738 ('1997-01-15 09:05:57.691','d144325c-24a9-411e-cc10-78b0637f75a7') +[100] 178532.5772 ('2055-04-22 16:20:11.498','41439c27-fba1-1b66-13a7-cf79fded4d9a') +[32] 123101.7871 ('2103-02-18 15:53:42.748','25b804d3-a73e-ed14-e6e1-eafb0d9473cd') +[-115,-85] -208371.1662 ('2039-10-22 18:06:50.235','41fba85a-5080-48bb-e18d-a8af04a890aa') +[-12,101] -7791.5577 ('2073-07-27 11:35:45.239','c00be55f-59ac-762c-af0a-9e33cf30a1f4') +[-127] -18602.1918 ('2024-02-05 19:54:00.798','ea85cbed-66f2-197b-4e63-dfbdcd306cce') +[-78,125] -112158.3556 ('2016-08-12 06:46:17.173','15809e25-b003-010b-c63c-9e880568736a') +[] 151191.1081 ('1982-11-12 17:59:10.171','d6bbbe2c-fca0-53c8-22a6-de9e0715d3cc') +[46] 58689.9611 ('2085-04-13 09:13:07.230','fe4be382-eb78-4cf9-fa57-c6eccf955419') +[-4,42] -88292.1046 ('1980-03-06 08:29:12.503','2633522e-ff9c-b837-1b9b-6559875c13b0') +[-2] 64983.6649 ('2034-07-07 11:20:23.903','d19f5c4d-e444-2e5b-a55d-5280d1760b94') +[-31,-110] -25638.6649 ('2025-05-17 21:45:25.519','3654a15e-bfa3-6075-b5b8-07e25310de1f') +[25] -140469.2476 ('2083-12-13 23:55:25.450','940f7441-ae40-d810-f6c3-e2fff468050c') +[-99] 128186.7318 ('1995-09-15 04:26:33.803','4cfd264f-ff00-4190-929c-b675826607d3') +[] -52961.0340 ('2046-03-19 14:15:50.245','314de821-308b-c61c-e256-9f6afed5d4f3') +[] -155852.9334 ('2052-01-29 06:31:08.957','5be2ccd2-b5bb-921d-5b5e-4a0e22385de7') +[-74,81] 29366.0091 ('1978-03-05 19:24:49.193','ab9b6a39-89ac-9280-c76f-60d598ce65c6') +[9] 56134.8951 ('2104-04-03 10:27:33.053','339a7f6d-0e0b-e039-78c0-2d045457d821') +[-61] 68841.1188 ('2059-07-26 12:14:33.197','c817bbb3-d091-b73c-1b9a-53f8a03bffb6') +[89] 168034.0459 ('2028-05-29 08:02:02.393','bc217a73-e802-1772-80b5-d8c827295799') +[124] 11648.6762 ('2084-12-25 12:10:35.676','77390177-1dd6-a5c0-dd35-4f85e38bcb2c') +[-47,-125] -120893.6705 ('2012-10-18 22:52:57.524','472814b2-4033-c5a5-7d86-fb36079e88fb') +[35] 153250.6252 ('2006-11-06 00:05:25.456','de0d6ed9-eca6-e01e-eb1c-c46c8ad6e33e') +[-43,70] -141086.3184 ('2013-02-03 23:07:11.759','65d48b24-cdc0-f7db-cb16-d0ad03279bcc') +[120,-57] -93351.1404 ('2000-02-03 14:39:00.466','6991722b-90dc-e9dd-c5e7-f28bd1d4f0d8') +[34,43] 187780.4567 ('2014-02-10 05:22:19.250','3db77bc5-d877-b22e-6667-955bf36d2e08') +[73] -90148.5697 ('2014-10-05 18:34:31.419','5a0f919e-38c9-0a68-e805-977db04d0acb') +[] -179121.0029 ('2077-01-23 07:57:55.365','fcf79336-a6dc-44fd-8c78-7e74e07b60fa') +[-69,120] 119321.8003 ('1989-07-01 13:11:35.185','92f6a362-250c-cfcd-acd7-99399cbf88ad') +[] 208864.8324 ('1991-02-17 03:04:00.682','b0dc8e88-ea6f-c2da-c116-3e4873dc8d54') +[22,-14] -127735.4391 ('2036-08-10 08:33:03.806','5ab1ab2b-913d-ff8a-6f8f-86387e77ed5c') +[83,-70] -142476.9847 ('2074-11-22 19:27:13.085','51b9d30a-3b10-265c-4086-1ac35b634ec7') +[] -128052.2443 ('2088-01-02 10:58:36.999','745e8226-d906-7fb3-33f4-9a079037bdcd') +[12,-116] -88390.1399 ('2074-02-18 17:46:45.208','fb5f827e-1809-6cab-2855-d45df20ecd92') +[] -84110.2097 ('2039-03-24 17:08:15.660','88e18c93-6276-d176-dad1-7db72e340ca7') +[] 202866.8175 ('2104-01-25 13:42:41.758','10faa33e-d383-c6b3-399d-44c06ebb00f5') +[-21] 151775.1601 ('1995-10-20 15:44:53.296','7ccaf135-787d-2ac0-09c0-7545c798ee14') +[-19] -15498.5738 ('2097-08-02 18:34:16.406','cf97f268-02c0-24fc-bbf3-c7b272632c14') +[116] -72670.9713 ('2020-08-31 18:10:41.904','f9cdd931-e2ed-0584-d4b9-67a6df717a4c') +[] 124014.7040 ('1975-07-23 11:17:25.176','ccf33ba5-8fd8-c8b5-ccc4-a9cb892d4b55') +[-56] -204745.8115 ('2037-11-13 01:03:12.923','6dc83c7b-7782-57b4-a293-18ca8aba331d') +[] -28535.2534 ('2105-04-07 20:51:09.990','0d9f3a2f-d4f2-a330-7b6e-001ea3aacbde') +[-124,-128] -31519.7583 ('1993-02-14 23:06:10.338','a073dafb-6f1f-273e-acf9-88200f82af6d') +[46] -154950.9257 ('2032-06-04 23:16:16.051','e6aa3b80-9f53-6c10-0cc8-622622f964b4') +[] 206914.3454 ('2003-10-05 10:44:30.786','137ed3be-2d40-d1c1-7aff-b32f7e21c0da') +[-47] 91521.1349 ('2006-09-01 04:06:32.496','52e4ef43-9379-4864-8f63-8e205875a096') +[121] 161456.7813 ('2027-11-03 10:20:30.670','f1abbd17-f399-657c-1a47-1dd627578b53') +[99] -127959.4741 ('2084-08-18 06:04:41.942','2a3b92c3-75ed-bd20-5a77-b77cbe1ce479') +[-97] 82020.4570 ('2061-10-25 06:16:50.814','8625d479-6e81-318f-5077-a9deb13c50e0') +[71] -121599.1388 ('2010-04-02 11:05:18.877','0ec279cf-c9b2-dc65-40c0-2d0f390b1102') +[] 98975.6469 ('2049-03-06 08:56:25.010','845340d7-a1df-9ddf-b737-9eb90ca6344c') +[92,81] 135864.7854 ('2040-12-30 21:17:28.184','ea224755-198e-c9ae-c59b-0517a7459d7c') +[81] -154620.5037 ('1984-06-07 02:36:28.734','52d3b727-043f-1d43-6f48-51e8abdc2127') +[38] 33379.3375 ('2057-10-19 17:03:44.317','e709bfc2-0915-9e4e-4d01-c10b24795e30') +[] 7491.1071 ('1971-04-29 09:30:25.245','26bcd2ab-6d0b-fc20-27eb-084c4248af7d') +[-122] -135635.3813 ('2010-03-04 23:05:25.982','66ed96eb-fc6e-653e-0353-ac4477ea60a6') +[] -174748.4115 ('2020-10-28 07:39:33.461','e17fa9ba-2595-c0f9-2f85-d6bbdc2f6f6a') +[72,106] 25749.2190 ('2008-06-15 04:03:39.682','0e47b616-da80-091e-664d-2a35bc57a480') +[-84,97] 109277.9244 ('1998-10-27 10:40:00.442','9488bce4-46d7-8249-78aa-540b8be43937') +[-120,-107] -64113.5210 ('2091-12-03 06:46:11.903','325fcb1c-8552-b434-b349-732d62be19f1') +[] -66141.6000 ('2085-10-05 08:08:11.830','4c66022b-75b9-b0a8-3897-b9de8ea851f1') +[-34,-102] -142314.4437 ('2038-01-27 12:04:29.739','91e9eb11-5679-02ef-6ea6-2c9fdcb12ed9') +[103] 96187.7213 ('1978-10-07 13:57:43.616','7c02e8e3-9e98-5043-8029-34e32ad1af61') +[] -21344.8423 ('2085-01-13 00:10:52.538','52cb36f8-987a-f414-7e0f-93ddccc5c377') +[16] -95098.4107 ('2074-02-19 18:56:00.878','821e4b10-f70a-4bee-ef0c-ac12eab994f3') +[21,86] 27954.7748 ('2033-10-18 03:15:38.815','bfe4d932-c5ed-45c0-9f50-72a6394d49af') +[] 149788.2085 ('2073-09-10 20:42:48.693','5e7d825e-5c88-7c89-4235-0e7934739a12') +[33,116] -148302.8732 ('2044-08-10 22:05:18.943','a53d4b07-5529-7472-3cca-3770f52b3648') +[] -98384.4505 ('2070-01-28 05:17:35.804','4833b839-51a3-87b8-7709-30676f697aa4') +[] -75597.1523 ('2075-02-04 19:24:01.477','d64becff-5c08-b0a0-e7f1-b86eaf5f1913') +[] 179005.6113 ('2100-05-27 21:54:12.965','d87ce81c-c471-b6b3-93b7-05225cb577be') +[] -134366.9213 ('2054-11-16 18:19:00.801','c348fced-6700-f0f6-cda0-14aef7ea6948') +[10] 82182.0343 ('2017-03-04 09:41:21.249','e19f0022-49ab-2d41-872d-be35669a79bc') +[-28] 90333.8564 ('2032-11-19 01:23:37.107','e2586be2-e968-21d0-d1b1-b438c55a59a3') +[-73] 185647.6735 ('2001-01-23 16:20:26.442','24b04f39-f272-24ff-538d-41e636a1a37a') +[-79,7] -87628.8007 ('2005-03-25 04:17:49.969','38a10e9d-7086-f358-8e50-c72b278bec42') +[119,-55] -208591.8591 ('1976-11-14 15:17:57.569','d0935dc7-7f56-71db-67f2-1b4e52770ba9') +[-108,-124] 181408.0349 ('2056-10-27 05:07:32.393','29d655c1-c35a-1245-25e2-65b4f233cb9c') - diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.sql b/dbms/tests/queries/0_stateless/01087_storage_generate.sql index 02a0dc732f4..46d49dc165f 100644 --- a/dbms/tests/queries/0_stateless/01087_storage_generate.sql +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test_table; CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Generate(); -SELECT COUNT(*) FROM test_table LIMIT 100; +SELECT COUNT(*) FROM (SELECT * FROM test_table LIMIT 100); DROP TABLE IF EXISTS test_table; diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference index 6e356185110..08081a34bc9 100644 --- a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference @@ -1,238 +1,238 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -972365100324636832 972365100324636832 1412771199 1412771199 11647 11647 127 127 -3152476261539479119 3152476261539479119 1791099446 1791099446 566 566 54 54 -4963323010661987954 4963323010661987954 124312908 124312908 56652 -8884 76 76 -5960170508884574548 5960170508884574548 1968572995 1968572995 2627 2627 67 67 -1945837313970117551 1945837313970117551 1080415314 1080415314 54354 -11182 82 82 -5416802321970971533 5416802321970971533 2578637408 -1716329888 57952 -7584 96 96 -14123053850434978662 -4323690223274572954 2103691749 2103691749 51685 -13851 229 -27 -5897270072098692621 5897270072098692621 1218125110 1218125110 7478 7478 54 54 -14395125118590964207 -4051618955118587409 776621019 776621019 19419 19419 219 -37 -6099213765121594905 6099213765121594905 4155847760 -139119536 13392 13392 80 80 +5443401583997919274 956654340036924402 2956613447 2041372187 46025 26509 247 -34 +14051730854243326159 340055300607421421 579798001 915264595 58925 22498 36 -57 +12126660396637528292 -9182366379883086416 535113873 -1583603936 45790 6066 230 91 +5198178071978083704 -3549936112074464250 3354362520 -1732019372 41330 -27737 13 -47 +9045663333607591872 -5069075924065328373 741246230 -1830932765 29642 -11720 41 7 +18192666371709191624 -5005976579831091773 671021725 1851158245 38613 -27838 57 3 +4333039311970693040 -7294587049092886539 2106347821 2101852759 24058 9107 85 94 +1398111012802844853 1131449717368086026 1687614855 -1193084417 9803 -18141 198 115 +15838944643191192696 6226099517671026657 1300309956 468322781 17216 -2375 184 -102 +15170414162889419078 3337938833953948518 3603117877 -1297530274 25534 8264 36 16 - Enum8(\'hello\' = 1, \'world\' = 5) world -hello -hello +world world hello hello world -hello +world world hello +world - Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) -['world','hello','hello','world','hello','hello','world','hello','world'] -['hello','hello','hello','hello','hello','world'] -['world','world','world','world','world','hello','hello','world'] -['hello','hello','world','hello','hello'] -['hello','world','world','world'] -['world','world','world','hello','hello','hello','world','hello'] -['world','world','hello','hello','hello','world','hello','hello','hello'] +['world','world','hello','hello','world','world','world'] +['world'] +['world','hello','world'] [] -['hello','hello','hello','hello','world','hello','world','world','hello'] [] +['world','hello','hello','hello','world'] +['hello'] +['world','hello','hello','world','hello'] +['hello','world','hello','hello','world','world'] +['world','hello','world','hello','hello','world','world'] - Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) +w h +h +o +w w o w -o h -h -w -o o - Date DateTime DateTime(\'Europe/Moscow\') -2001-11-21 2014-10-08 16:26:39 2014-10-08 16:26:39 -1971-07-21 2026-10-04 10:37:26 2026-10-04 10:37:26 -2106-02-07 1973-12-09 22:21:48 1973-12-09 22:21:48 -1977-03-12 2032-05-19 12:49:55 2032-05-19 12:49:55 -2106-02-07 2004-03-27 22:21:54 2004-03-27 22:21:54 -2106-02-07 2051-09-18 11:10:08 2051-09-18 11:10:08 -2106-02-07 2036-08-30 09:49:09 2036-08-30 09:49:09 -1990-06-23 2008-08-07 20:05:10 2008-08-07 20:05:10 -2023-03-03 1994-08-11 20:03:39 1994-08-11 20:03:39 -2006-09-01 2101-09-11 05:09:20 2101-09-11 05:09:20 +2031-03-05 2034-09-09 02:49:47 2061-06-26 03:46:01 +1972-10-06 1999-01-02 11:09:55 2064-03-18 05:47:09 +2004-01-16 2055-12-02 15:29:20 2090-08-18 23:04:46 +2061-07-14 2051-03-20 20:58:44 1973-04-20 21:20:34 +2063-04-13 2048-01-31 01:02:11 2051-02-07 03:11:54 +2106-02-07 2028-08-29 13:37:25 2054-10-20 03:48:21 +2026-11-24 2036-08-09 02:59:19 2065-10-12 06:39:38 +2106-02-07 2068-04-17 13:07:59 2101-04-03 08:48:59 +1997-11-15 1984-11-03 12:39:41 1998-04-01 17:38:08 +2008-09-11 2064-12-25 16:23:42 2031-10-18 03:20:14 - DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') -2026-10-04 10:37:26.199 2026-10-04 10:37:26.771199 2026-10-04 10:37:26.771199 -2032-05-19 12:49:55.908 2032-05-19 12:49:55.312908 2032-05-19 12:49:55.312908 -2051-09-18 11:10:08.314 2051-09-18 11:10:08.415314 2051-09-18 11:10:08.415314 -2008-08-07 20:05:10.749 2008-08-07 20:05:10.691749 2008-08-07 20:05:10.691749 -2101-09-11 05:09:20.019 2101-09-11 05:09:20.621019 2101-09-11 05:09:20.621019 -2062-07-10 05:18:58.924 2062-07-10 05:18:58.002924 2062-07-10 05:18:58.002924 -2074-03-12 07:52:50.424 2074-03-12 07:52:50.576424 2074-03-12 07:52:50.576424 -2075-04-13 03:14:55.643 2075-04-13 03:14:55.060643 2075-04-13 03:14:55.060643 -1990-06-27 22:41:59.565 1990-06-27 22:41:59.574565 1990-06-27 22:41:59.574565 -2071-06-01 23:38:13.679 2071-06-01 23:38:13.873679 2071-06-01 23:38:13.873679 +1988-05-16 19:00:01.447 2064-03-18 05:47:09.972361 2104-06-20 09:26:44.845879 +2076-04-17 18:22:00.873 1973-04-20 21:20:34.769886 2052-08-01 07:14:05.921510 +1991-04-07 13:55:25.230 2054-10-20 03:48:21.341514 2013-02-07 18:37:45.437737 +2023-06-24 16:54:15.821 2101-04-03 08:48:59.544378 2039-07-05 08:51:02.770005 +2084-03-05 21:04:37.956 2031-10-18 03:20:14.437888 2076-03-16 14:08:20.993528 +1999-01-02 11:09:55.187 2054-01-01 16:49:22.580109 1997-01-09 20:11:35.889758 +2051-03-20 20:58:44.360 1975-02-11 06:38:15.042546 2015-10-21 23:47:13.191963 +2028-08-29 13:37:25.531 1975-02-14 07:25:38.319928 2103-09-16 20:57:23.033927 +2068-04-17 13:07:59.759 2024-03-06 21:42:43.711891 2045-04-22 19:38:11.140126 +2064-12-25 16:23:42.781 2025-08-18 15:44:56.149625 2093-09-26 16:30:56.744858 - Float32 Float64 -1.1193126e38 9.476003236390048e306 -1.4190551e38 3.072187108218657e307 -9.849083e36 4.836914127890829e307 -1.5596642e38 5.808373317056589e307 -8.559931e37 1.8962795639737924e307 -2.043007e38 5.278843956528775e307 -1.6667162e38 1.3763359457240068e308 -9.650981e37 5.747074866263327e307 -6.1530254e37 1.4028501451406057e308 -3.2926016e38 5.943875336376322e307 +2.3424705e38 5.304765772621186e307 +4.5936326e37 1.3693852957827914e308 +4.2396088e37 1.1817811347484115e308 +2.6575997e38 5.065787759860024e307 +5.8727575e37 8.815282962741328e307 +5.3163816e37 1.7729324649694315e308 +1.6688205e38 4.2226828718895e307 +1.3370661e38 1.3625030842560206e307 +1.0302116e38 1.5435548915708008e308 +2.8546838e38 1.4784044970034722e308 - Decimal32(4) Decimal64(8) Decimal64(8) -141277.1199 34587215859.56486735 1793697015189548808800.3321186298199631 -179109.9446 91310388980.88247636 9155714933283527596575.2265933417808212 -12431.2908 54718336176.75535245 3589436293988117804903.9923828992983949 -196857.2995 66269939664.00937485 -7975800700274615666991.6205621121901043 -108041.5314 -29591948098.91367399 -7473917794926308807002.4588259571608039 --171632.9888 -3616165358.65719375 16693382269433918843239.6450094159824305 -210369.1749 80507399843.63587111 4905995755279397551053.6888361872164391 -121812.5110 -34656882097.63586027 12093397887518858443128.8806284162773013 -77662.1019 -5854815277.05485807 14924696978427516066647.5550554696171025 --13911.9536 -18527474.08621639 2233902906033696342890.3204716019302329 +-133835.3849 87676267830.44260947 10041303591043480341650.6377217747572943 +57979.8001 -68015271123.73929132 -11658496611537681782723.8256877955807880 +53511.3873 -78637963449.98695195 16686303649199763212696.4854950355256776 +-94060.4776 90273888640.14252543 7993046724924589483272.0796323974797493 +74124.6230 20114310313.64207198 -4810540869033768101015.4448286464595642 +67102.1725 -60472921957.85611731 1764715777766465744700.9237855716355053 +210634.7821 -20967919098.37725326 -16938476260073815366594.8118263905360890 +168761.4855 -74544559691.08355371 -9350794626143586522954.2962771754340925 +130030.9956 -54650148153.48939189 -13456138041801265081736.4812607484010998 +-69184.9419 38286965773.25360062 11485126437992390872631.7990315807376230 - Tuple(Int32, Int64) -(1412771199,972365100324636832) -(1791099446,3152476261539479119) -(124312908,4963323010661987954) -(1968572995,5960170508884574548) -(1080415314,1945837313970117551) -(-1716329888,5416802321970971533) -(2103691749,-4323690223274572954) -(1218125110,5897270072098692621) -(776621019,-4051618955118587409) -(-139119536,6099213765121594905) +(-1338353849,5443401583997919274) +(579798001,-4395013219466225457) +(535113873,-6320083677072023324) +(-940604776,5198178071978083704) +(741246230,9045663333607591872) +(671021725,-254077702000359992) +(2106347821,4333039311970693040) +(1687614855,1398111012802844853) +(1300309956,-2607799430518358920) +(-691849419,-3276329910820132538) - Array(Int8) -[127,54,76,67,82,96,-27,54,-37] -[80,-20,18,104,-94,99] -[-1,101,-9,79,-75,-106,-124,25] -[-126,76,-7,-28,20] -[50,109,25,61] -[119,75,-17,80,-26,-86,-1,78] -[87,87,-98,-52,-10,-55,90,-116,-110] +[27,83,32,84,-29,-27,87] +[-1] +[-35,94,-55] [] -[-8,30,126,-82,-83,-112,109,-37,46] [] +[45,-34,114,-54,-43] +[-6] +[75,64,-66,-115,-30] +[-78,-89,56,66,-109,35] +[-71,72,-9,36,-26,13,41] - Array(Nullable(Int32)) -[1412771199,1791099446,124312908,1968572995,1080415314,-1716329888,2103691749,1218125110,776621019] -[-139119536,458002924,-1375243758,-1499390872,-1006911326,282060643] -[-972627201,-1550392731,646512119,-1176093617,-1094550603,1998158230,-1491853180,-1004684263] -[-1087003262,1246511948,-287482375,1245659620,-447199980] -[75852338,-10461331,924798233,-707845059] -[-694178953,-1402336437,497728751,1937926992,-129643034,982571178,2100766207,-1068740018] -[473070167,-1859986345,611197598,-1197393460,640993526,1545236681,654453082,1474541196,293114258] +[2041372187,915264595,-1583603936,-1732019372,-1830932765,1851158245,2101852759] +[-1193084417] +[468322781,-1297530274,-1407994935] [] -[-894672648,-272503522,-1437033090,-1467734354,-1327634003,176306576,1752494957,-228452133,1262726958] [] +[-1321933267,-488197410,104178034,-1735625782,-1618897195] +[-1272422918] +[-153016757,891437888,1950049214,6580109,-1644079134] +[790042546,161321895,1074319928,161583938,515711891,1709750563] +[-149817671,1755521096,815845879,-51580892,1361921510,-1688868851,-1185529559] - Tuple(Int32, Array(Int64)) -(1412771199,[972365100324636832,3152476261539479119,4963323010661987954,5960170508884574548,1945837313970117551,5416802321970971533,-4323690223274572954,5897270072098692621,-4051618955118587409]) -(1791099446,[6099213765121594905,9049500661325627565,6490610108199929265,2659545628039292992,1049436691901089319,6555844131189778155]) -(124312908,[-3618886197108527083,8090694443849478311,-6748112818340381167,1211001191921707623,1583062447640133561,3619956371411508974,6210395553935478118,7999260120568553136]) -(1968572995,[-8213617876379062675,2929679139336823999,-6898532963881595768,2859043124745757119,181602243649873688]) -(1080415314,[7459507076057491211,2235978668844106654,7747732820502187351,-7347147272488840513]) -(-1716329888,[-3574726218986141421,3981338983611724340,8771848629812837701,-6576737000659768488,6769293131084427761,-8327622552204843394,1747121705533891044,4459867665041030928]) -(2103691749,[7974359543614958207,-973852668603249740,8242686802579364489,-1858485842996203811,-7644694849238348395,121039232775176155,2267202361603555760,-7133497827826495528,-4966062503744808160]) -(1218125110,[]) -(776621019,[-4132303197660648388,-402394423460281791,3022356521898683902,48062283965211565,36155406469293327,-2534696891053843107,-6385296537632895626,5203636353956926927,5117509146781816226]) -(-139119536,[]) +(-1338353849,[5443401583997919274,-4395013219466225457,-6320083677072023324,5198178071978083704,9045663333607591872,-254077702000359992,4333039311970693040]) +(579798001,[1398111012802844853,-2607799430518358920,-3276329910820132538,956654340036924402,340055300607421421]) +(535113873,[]) +(-940604776,[-9182366379883086416,-3549936112074464250,-5069075924065328373,-5005976579831091773]) +(741246230,[-7294587049092886539]) +(671021725,[1131449717368086026,6226099517671026657,3337938833953948518,-104956130729581604,515805789944032293]) +(2106347821,[2731028582309582302,-8197314279937271385,7439592879615992239,-8726726222408049230,-4046170041070917399,-8162695179087422573,7147712321550951494,-2473105312361834401,2871941532606538254]) +(1687614855,[7045950974355040215,8128475529675984757,3862453874713979777,8584893221699499395,-4344095019439049735,7221768832555831190,5712009283210486481,8657278465574644253,-4620821897447975309]) +(1300309956,[-3580736586972265629]) +(-691849419,[7980379733974797651,-548434416689229144]) - Nullable(String) --CRz!2%=1 -)X$U*B -0?,$n_&! -e;P,& -{<*8 -CT"o98o+ -8)7+64&X\' +;\\Sm\'sH +T +@^1 -pX*997@,L +7-f)$ +9 +)&}y3 +w5>+\'@ ++g+N^g$ - Array(String) -['UL*`+L>~ ',')}e&,t','2a%#x0]/','f\'J&Q','3 +(','|p)`&"/-','&5#-WZq> ','','Gf,d^m3/y'] -['','+%%F','-+WPe($q','$,"N','','e!B'] -['({ ;\'','=$<8j',',0EEm!=Y(','+ "i,#-IK',')co','','-(9(t!','-wz'] -['*?kY','tj~P/k\'H','.','','-n.`#2'] -['%T\'3Y"xM','0+@Tf',',0a','k G"QB$'] -['MH3','d`{K+A9ff',',','Y7','m5','3"Hx>q+(','WI+>P>6','8ip-/@lf'] -['.+msJ,j','Z','#;;Dv-1p','p&v&&m','x5R1!=','\\',',v','@2)1gx',',N3jX+@\\'] +['Y9n(%ub','\\\'f%7','','X0O@','D','4^,~q','a(gmt6#{X'] +['@#q4?Q%\'.'] +['h','#B','{'] [] -['j&;\'j]*+','D#){','L.-%4E',' 0',')P.','/^D.J{','P*43gf!','T,q','/)RJ.u#6'] [] +['~vYP/4f9.',':,7u.0',';e.<','^O,i','3'] +['!y1/Z\'5D'] +['&- KDN%>[','>-xM./ B','?+//','M,.71QR#_','~N'] +['z9P/%m','7q\'!k','Q%] #.*3','U:&XeP{*',',','s.3'] +['+k.=%','8\'nb=P','-uY ,h8(w','=\'W$','','m<+%l','<~+@ Vw'] - UUID -0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f -44e143fb-3c07-b672-52b6-c77fded30154 -1b0100ba-41e3-9baf-4b2c-59032b97438d -c3ff2aa6-5ae3-c366-51d7-4fe2affc1e0d -c7c5c206-d6ea-d1ef-54a4-c29ab73ed619 -7d9648e7-b8fa-00ad-5a13-478132d105b1 -24e89aa8-6739-a840-0e90-591c73068627 -5afb0980-c265-66eb-cdc7-222d6ac83015 -7047ebba-f7df-f0a7-a259-e3250e8fbe11 -10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9 +4b8ad8e6-77fe-a02a-c301-c6c33e91d8cf +a84a8d61-8560-a0e4-4823-a36a537a8578 +7d88a6e0-3d74-cfc0-fc79-55ad1aebc5c8 +3c220c4b-fbd0-efb0-1367-168f02acd8b5 +dbcf3c3c-127c-bc78-d288-234e0a942946 +0d46b80d-bebc-93f2-04b8-1e7e84b1abed +8091ae6d-8194-3db0-cebc-17ea18786406 +b9a7064d-de99-e30b-ba87-32d2cd3e2dc3 +9ac46bba-2ba7-4ff5-0fb3-b785f4f0de0a +56678c86-2703-2fe1-2e52-bdaf2fce8366 - Array(Nullable(UUID)) -['0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f','44e143fb-3c07-b672-52b6-c77fded30154','1b0100ba-41e3-9baf-4b2c-59032b97438d','c3ff2aa6-5ae3-c366-51d7-4fe2affc1e0d','c7c5c206-d6ea-d1ef-54a4-c29ab73ed619','7d9648e7-b8fa-00ad-5a13-478132d105b1','24e89aa8-6739-a840-0e90-591c73068627','5afb0980-c265-66eb-cdc7-222d6ac83015','7047ebba-f7df-f0a7-a259-e3250e8fbe11'] -['10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9','323cab23-f465-1eee-562f-c1da2afd4d66','6f0314b2-a06f-deb0-8e03-5e0f1da0ae6d','28a84fa7-b60b-24bf-a043-7cd1c083cc88','27ad5c92-f7fe-61bf-0285-2e47472a2718','67857e29-dbeb-670b-1f07-cac0a9cdc39e'] -['6b8579f3-74ab-0d57-9a09-b07b86d292bf','ce640571-8c0e-4913-3740-8eb628977234','79bbddda-837d-bd45-a4ba-bc8afe81ab58','5df15c3c-cac7-71f1-8c6e-5769104c667e','183f05ef-3db8-e1e4-3de4-a20310057f10','6eaa9dc1-446a-6a7f-f27c-2e2858564bb4','7263e7f9-ae8b-1a89-e635-54f8f41e6edd','95e89689-b6e3-cd95-01ae-048a79733fdb'] -['1f76b889-254e-81b0-9d00-b9853e618fd8','bb150076-d3e7-5b20-c6a7-1c22b739ec3c','fa6a6867-620d-4241-29f1-913e585b2dfe','00aac064-82d7-23ad-0080-73268f4fe50f','dcd2f29b-6372-dd5d-a762-dea138dc0d76'] -['483707b1-a1e9-55cf-4705-0b748cbe15a2','95696803-f67d-78a2-368a-fa0468a6508a','05c19621-f1a0-0e84-6f41-0b3770eca9ab','859e6ea8-1d2c-a3e7-7f0c-47b4f3dcb8a8'] -['eae86216-0177-15bb-b3e7-11879fc325bc','9c92bccb-abf1-9455-bb7b-e6bdc168dd7c','8ce11561-8eb4-77f3-4a8a-94932decef27','d7c09609-7dca-1d83-0a0c-47a3e6fada0c','8d2ec2d8-21a0-c236-a7d8-ce7dba4efacf','b6e36c2a-9b64-c780-b8c2-3ad72f8c2100','311d94bb-98bd-2b9f-08f6-968caf310aa1','30b0db78-8f4e-d9ac-74a6-5569e114ec51'] -['085ad107-02f1-7923-bc64-7e42b4637d62','76ca80a3-4493-0e5d-d3ba-c269e6dcff7a','78b1fc1a-9617-6469-d4c1-e7400e855264','7c6ba877-1533-0897-43cb-dfa9f01faab2','5a93fb7a-59a4-0669-1bc0-3c084d8d91b5','3ed7d592-5110-ddc4-5b09-62d6e599ec08','30dabf5e-4bb4-4f6f-08a3-2da66221aa26','33ad19f4-5c15-0885-5ea7-468d774b73f2','26388262-e01d-1c58-df21-aec00d17a7d8'] +['4b8ad8e6-77fe-a02a-c301-c6c33e91d8cf','a84a8d61-8560-a0e4-4823-a36a537a8578','7d88a6e0-3d74-cfc0-fc79-55ad1aebc5c8','3c220c4b-fbd0-efb0-1367-168f02acd8b5','dbcf3c3c-127c-bc78-d288-234e0a942946','0d46b80d-bebc-93f2-04b8-1e7e84b1abed','8091ae6d-8194-3db0-cebc-17ea18786406'] +['b9a7064d-de99-e30b-ba87-32d2cd3e2dc3'] +['9ac46bba-2ba7-4ff5-0fb3-b785f4f0de0a','56678c86-2703-2fe1-2e52-bdaf2fce8366','fe8b1ef4-86dd-23dc-0728-82a212b42c25'] [] -['0a1ce45d-7e7c-1295-6005-6cc519af1ef0','a8285719-a806-6ef2-9d05-27c5668dcf6b','c67242ca-cc6d-ae56-1702-8c45b5d6e2a3','751e5172-2cdd-d4de-0694-a5753a1f80ad','b6e0792c-3f87-80fa-dd3d-5e3bcf883eb7','2f10e4d6-feb0-c82a-8051-44ff033734dc','fdc53bbe-4279-2bc7-cf4d-11340b2d2ab4','4b44f8cf-353f-fee0-69d9-5785e7d7c760','61e7590b-e7d8-62fd-16b2-7e0636d3cbe3'] [] +['25e69006-a800-55de-8e3d-4a17f81a19a7','673ebe4e-af09-61af-86e4-70bca5481db2','c7d91dc9-0123-e8e9-8eb8-47fb80b35b93','6331c67c-7aba-2446-ddad-c3d24c8a985f','27db2f9b-92b4-220e-61c8-3f14833fe7d7'] +['70ce256d-7fca-cf75-359a-3155bba86b81'] +['7723aae6-820e-b583-c3b6-ac9887cab3f9','6438e07d-9a7a-4f96-4f45-222df77ea2d1','7824d4e5-6e37-2a1d-bfdf-8af768445673','ce4eab0b-b346-0363-6ec0-0116104b4d53','f86391cd-c2a3-d2a8-ea85-27ed3508504c'] +['b87d39f3-e56c-7128-7a66-4e516e2ce1c0','af25bac3-d662-673a-3516-022e687643ed','657c9997-3b29-f51c-7193-6a3b9a0c18eb','19bb38b5-6f97-a81c-2d56-57f189119a1a','0c7e416d-c669-dc04-1130-ff950fbbf44b','9705bc44-8d08-c734-6f47-8edcc1608a81'] +['7c20103f-659a-f845-399c-abdc8dc88ba0','bf479e85-1a0f-66c3-66fa-f6029e4ee2a8','f14af1a0-823c-b414-eb8e-e6b05b019868','ce353e45-2a9e-492c-1c54-d50459160ecf','1e232279-77ad-db7e-82f6-b4b3e30cdc2e','991111af-30a3-1ff7-e15a-023dfa0a8a6e','a749ef39-dc02-d05c-e8b2-129a7cccfd24'] - FixedString(4) -6LC -R`6 -P -hc -eO - -L -2m= -wKP -N +G +- +5S + T +W^ +-r +K +@ +8B +#H - String --CRz!2%=1 -)X$U*B -0?,$n_&! -e;P,& -{<*8 -CT"o98o+ -8)7+64&X\' +;\\Sm\'sH +T +@^1 -pX*997@,L +7-f)$ +9 +)&}y3 +w5>+\'@ ++g+N^g$ - -[127] 141277.1199 ('2026-10-04 10:37:26.199','0d7e88e8-2cd6-48a0-2bbf-d87513e97e4f') -[] 179109.9446 ('2032-05-19 12:49:55.908','44e143fb-3c07-b672-52b6-c77fded30154') -[] 12431.2908 ('2051-09-18 11:10:08.314','1b0100ba-41e3-9baf-4b2c-59032b97438d') -[54] 196857.2995 ('2008-08-07 20:05:10.749','c3ff2aa6-5ae3-c366-51d7-4fe2affc1e0d') -[] 108041.5314 ('2101-09-11 05:09:20.019','c7c5c206-d6ea-d1ef-54a4-c29ab73ed619') -[] -171632.9888 ('2062-07-10 05:18:58.924','7d9648e7-b8fa-00ad-5a13-478132d105b1') -[76] 210369.1749 ('2074-03-12 07:52:50.424','24e89aa8-6739-a840-0e90-591c73068627') -[] 121812.5110 ('2075-04-13 03:14:55.643','5afb0980-c265-66eb-cdc7-222d6ac83015') -[67] 77662.1019 ('1990-06-27 22:41:59.565','7047ebba-f7df-f0a7-a259-e3250e8fbe11') -[] -13911.9536 ('2071-06-01 23:38:13.679','10ce572a-fb11-6a67-15f8-2ae8a0d9bfb9') +[27] -119308.4417 ('1998-04-01 17:38:08.539','4b8ad8e6-77fe-a02a-c301-c6c33e91d8cf') +[83] 46832.2781 ('1970-03-18 06:48:29.214','a84a8d61-8560-a0e4-4823-a36a537a8578') +[32] -129753.0274 ('1995-01-14 03:15:46.162','7d88a6e0-3d74-cfc0-fc79-55ad1aebc5c8') +[] -140799.4935 ('2004-01-17 09:12:08.895','3c220c4b-fbd0-efb0-1367-168f02acd8b5') +[] -132193.3267 ('1986-05-06 01:18:11.938','dbcf3c3c-127c-bc78-d288-234e0a942946') +[84] -48819.7410 ('2101-05-10 09:27:05.563','0d46b80d-bebc-93f2-04b8-1e7e84b1abed') +[-29] 10417.8034 ('1995-11-08 18:51:19.096','8091ae6d-8194-3db0-cebc-17ea18786406') +[-27] -173562.5782 ('2013-02-27 03:31:50.404','b9a7064d-de99-e30b-ba87-32d2cd3e2dc3') +[] -161889.7195 ('2068-07-13 23:42:17.445','9ac46bba-2ba7-4ff5-0fb3-b785f4f0de0a') +[87] -127242.2918 ('2033-08-04 15:06:45.865','56678c86-2703-2fe1-2e52-bdaf2fce8366') - -[-59] 1234817989 aRM8 123481.7989 o 4.50418660252953e307 ('2106-02-07','2009-02-16 23:59:49','2007-02-20 10:43:46.989','4024497c-b1f5-5b35-44e7-4cf478103f02') Ų -[-78] 1171957426 C 117195.7426 w 4.838569580371409e307 ('2086-11-02','2007-02-20 10:43:46','2002-10-04 02:54:48.647','a4739e76-3fbd-b537-34b8-c2b4c272296f') -[-25,-72] 275100647 %5 27510.0647 w 1.1548186651907869e308 ('2096-02-04','1978-09-20 03:50:47','1974-04-19 01:48:12.192','1e8f333f-d6c7-cd48-7a23-dd0a93d8418f') ج -[] 1033685688 2?E 103368.5688 w 3.7022451508754407e307 ('2106-02-07','2002-10-04 02:54:48','2002-01-28 12:47:02.271','f8cf0cb7-8a30-acab-b618-22f5db288237') _ -[-40] 180895192 (g 18089.5192 h 2.1459523436508332e307 ('2013-05-07','1975-09-25 19:39:52','2053-11-20 07:10:58.662','fe7c4fab-b30e-0db2-fb21-64a726f71730') &R -[] 135557292 ;( 13555.7292 w 8.576968910623855e307 ('2048-12-21','1974-04-19 01:48:12','1986-04-08 19:07:15.849','dd091972-50eb-59db-242f-0c7311995279') A# -[-84,95] 716914271 [ 71691.4271 h 1.7471947254414614e308 ('2013-09-19','1992-09-19 18:51:11','2081-03-06 04:00:55.914','c87feb6e-decf-2422-14d3-3312e8eef7b3') -[22,38] 1012211222 L 101221.1222 h 1.2787095439887414e308 ('1991-02-02','2002-01-28 12:47:02','1979-01-20 20:39:20.939','da4f0e49-9267-2228-10d9-dcd46ccff515') H -[82,65] 2185722662 h/ -210924.4634 w 1.7870585909530327e308 ('2056-04-25','2039-04-06 20:11:02','2063-07-18 01:46:10.215','400d5020-f53c-224d-0e25-215f73801b7e') w -[35] 2647224658 (!f -164774.2638 o 1.763497936342361e308 ('2049-06-05','2053-11-20 07:10:58','1996-11-02 14:35:41.110','8a9107d8-e5ce-78a0-fa35-9dc406168d47') \r +[] 3608695403 ZL 109414.2847 h 2.2986075276244747e306 ('1985-05-10','2009-10-28 20:06:11','1993-01-03 17:51:52.981','b13ff007-c245-d737-85b2-1fa003e57127') . +[85] 4204173796 ], -199466.5471 h 1.1231803213254798e308 ('2075-04-03','1983-02-12 23:57:05','2060-06-06 20:15:08.751','a2f2cbf4-b11b-6976-7b91-14b6964acbe2') * +[-94,100] 32713522 8D$ 102255.5602 h 1.738807291208415e308 ('2029-07-12','2056-08-07 23:18:32','2081-01-25 13:13:30.589','445a77b5-0a27-3485-8dd8-c7cc35d2692f') +[] 4117557956 0b>+ 65942.4942 w 5.949505844751135e307 ('2048-03-05','2074-01-22 02:32:44','2073-12-04 05:05:06.955','c12095e6-b82c-d81c-4629-acd80e02b080')  +[] 1511604199 Il= -96352.6064 o 1.6472659147355216e308 ('2024-06-01','2024-12-26 00:54:40','2038-04-14 05:21:44.387','ebbbe70a-0321-ff18-89de-2bc9a9e4c454') Q +[-18] 2278197196 ~ 193977.7666 o 1.213689191969361e308 ('2060-10-04','1992-10-24 16:31:53','1983-06-10 08:51:48.294','805b0a62-9ada-a47e-2d5e-63cb5923549c') \t +[] 3761265784 N"(6 -59230.0369 o 1.2102282609858645e308 ('2106-02-07','2060-07-09 20:14:59','2007-03-17 04:51:09.288','429df3a4-ff18-28d5-9ad8-dcdd78e8b1ae') Y| +[] 66606254 6x&+ 130635.2269 o 1.1958868988757417e308 ('2088-10-07','2070-03-01 21:30:45','1978-05-22 14:28:52.523','d63c5cbb-9418-ce59-000c-056f88157bfa') у +[-27,-12] 4089193163 )+.8 -111081.7896 o 1.464035857434812e308 ('2106-02-07','2007-04-27 23:04:36','1987-07-21 04:32:01.821','2bc4860a-7214-300a-851e-b61011c346ef') # +[14,-43] 3638976325 #" 116961.4294 o 9.260305126207595e307 ('2042-06-11','2087-12-28 00:21:16','2071-04-01 21:44:13.058','c00d218a-913f-b657-1ff9-99927741f7ab') Fx - From 74d7d95d3414c35c944ba3ccc50643f9ac2c8f73 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 3 Mar 2020 15:01:25 +0300 Subject: [PATCH 109/215] fixup --- dbms/tests/performance/array_element.xml | 6 +++--- dbms/tests/performance/base64.xml | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/tests/performance/array_element.xml b/dbms/tests/performance/array_element.xml index 1040c33ddbf..f4a33810fdd 100644 --- a/dbms/tests/performance/array_element.xml +++ b/dbms/tests/performance/array_element.xml @@ -8,7 +8,7 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) - SELECT count() FROM numbers(1000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) - SELECT count() FROM numbers(1000000) WHERE NOT ignore([[], []][number % 2 + 2]) + SELECT count() FROM numbers(10000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) + SELECT count() FROM numbers(10000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) + SELECT count() FROM numbers(10000000) WHERE NOT ignore([[], []][number % 2 + 2]) diff --git a/dbms/tests/performance/base64.xml b/dbms/tests/performance/base64.xml index dbf8e0dc981..232b4d20ba3 100644 --- a/dbms/tests/performance/base64.xml +++ b/dbms/tests/performance/base64.xml @@ -24,7 +24,6 @@ table numbers(10000000) - numbers_mt(100000000) From d0bcbf758160c48df11832a8ae6a90e47dbd0cae Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 15:51:41 +0300 Subject: [PATCH 110/215] Review and style fixes --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 1 - dbms/src/Storages/ColumnsDescription.cpp | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 2aa2810dbc9..850260dea91 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -62,7 +62,6 @@ namespace ErrorCodes extern const int UNKNOWN_DATABASE_ENGINE; extern const int DUPLICATE_COLUMN; extern const int DATABASE_ALREADY_EXISTS; - extern const int THERE_IS_NO_DEFAULT_VALUE; extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int DICTIONARY_ALREADY_EXISTS; diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index fb03d2c375c..2b2281c9663 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -430,7 +431,7 @@ ColumnsDescription ColumnsDescription::parse(const String & str) Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context) { for (const auto & child : default_expr_list->children) - if (child->as() || child->as()) + if (child->as() || child->as() || child->as()) throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); try From f1336592857ae651973db5207569a81086107b8d Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Tue, 3 Mar 2020 17:20:08 +0300 Subject: [PATCH 111/215] CLICKHOUSEDOCS-548: Added some adopters to the list. --- docs/en/introduction/adopters.md | 139 +++++++++++++++++- .../query_language/agg_functions/reference.md | 4 +- .../query_language/agg_functions/reference.md | 22 +-- 3 files changed, 151 insertions(+), 14 deletions(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 1cc85c3f881..edfc774f7b6 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -10,22 +10,159 @@ | [Appsflyer](https://www.appsflyer.com) | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | | [Badoo](https://badoo.com) | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | | [Bloomberg](https://www.bloomberg.com/) | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| [CARTO](https://carto.com/) | Business Intelligence | Geo analytics | — | — | [Geospatial processing with Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | | [CERN](http://public.web.cern.ch/public/) | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| [Cisco](http://public.web.cern.ch/public/) | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | +| [Cisco](http://cisco.com/) | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | | [Citadel Securities](https://www.citadelsecurities.com/) | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| [Citymobil](https://city-mobil.ru) | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | | [ContentSquare](https://contentsquare.com) | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | [Cloudflare](https://cloudflare.com) | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| [Corunet](https://coru.net/) | Analytics | Main product | — | — | [Slides in English, April 2019 ](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| [Deutsche Bank](db.com) | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | | [Exness](https://www.exness.com) | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | [Geniee](https://geniee.co.jp) | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| [Idealista](www.idealista.com) | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Kontur](https://kontur.ru) | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | | [LifeStreet](https://cloudflare.com) | Ad network | Main product | — | — | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| [Mail.ru Cloud Solutions](https://mcs.mail.ru/) | Cloud services | Main product | — | — | [Running ClickHouse Instance, in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | +| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | --- | --- | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | | [Qrator](https://qrator.net) | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | | [Tencent](https://www.tencent.com) | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | [S7 Airlines](https://www.s7.ru) | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| [scireum GmbH](https://www.scireum.de/) | e-Commerce | ??? | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | | [Spotify](https://www.spotify.com) | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | | [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/ml.pdf) | | [Yandex Cloud](https://cloud.yandex.ru/services/managed-clickhouse) | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | | [Yandex DataLens](https://cloud.yandex.ru/services/datalens) | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| [Yandex Market](https://market.yandex.ru/) | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | | [Yandex Metrica](https://metrica.yandex.com) | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| [ЦВТ](https://htc-cs.ru/) | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| --- | --- | --- | --- | --- | --- | +| --- | --- | --- | --- | --- | --- | + +### Not checked mentions + +- Bioinformatics - evolutionary genetics: +https://github.com/msestak/FindOrigin + +"We are exploring evolution of novel genes in genomes because if seems that genomes are far from being static as previously believed and what actually happens is that new genes are constantly being added and old genes are lost." + +- Search engine and analytics for Bitcoin transactions: +https://blockchair.com/ + +"We have quite large tables on just single server and everything works really fast &mdash with any filters and sorting everything is processed just instantly." + +- https://www.octonica.ru/ презентация https://github.com/ClickHouse/clickhouse-presentations/blob/master/database_saturday_2018_2/octonica/meetup.pptx + + + +### Stash + +- [СБИС, Тензор](https://sbis.ru/about), Analytics. В своих вакансиях регулярно указывают ClickHouse + есть задание для студентов 2018 https://tensor.ru/stipend2018/projects Active Manager - Анализатор активности на странице, где подразумевается использование ClickHouse. + +- Компания Republer https://republer.com/ О том, что они используют ClickHouse видно из вакансии https://telegram.me/nodejs_jobs_feed/607 + +- Kaspersky заявлен в нескольких презентациях, Однако живых публичных источников найти не получается. Поисковики приводят на сайт вакансий Касперского, но в явном виде ClickHouse там нигде не появляется. Есть https://careers.kaspersky.ru/tech/, поиск, на котором приводит на вакансию девопса, при этом в описании вакансии ClickHouse не упоминается. Вот такая штука есть https://sudonull.com/post/76060-The-second-mitap-of-the-Rust-community-in-Kaspersky-Lab-Kaspersky-Labs-blog + +- Мегафон. На сайтах вакансий от него есть несколько вакансий с ClickHouse типа https://ekaterinburg.hh.ru/vacancy/35891497?utm_source=jooble.org&utm_medium=meta&utm_campaign=RU_paid_cpc_applicant_feed_magic1 и есть вот такая хрень https://newprolab.com/ru/dataengineer-megafon, с которой непонятно, что делать. Вакансии не кажутся хоть сколько-нибудь надёжным источником, поскольку сейчас есть - завтра нет. Как у касперского. + + +- [Quantrum.Team](https://quantrum.me). Непонятное комьюнити трейдеров, один из которых решил попробовать ClickHouse. https://quantrum.me/1709-clickhouse-i-python-dlya-xraneniya-istorii-cen/ + +- https://severalnines.com/database-blog/hybrid-oltpanalytics-database-workloads-replicating-mysql-data-clickhouse какая-то компания типа альтинити видимо. + +- В презентациях есть Wikimedia Foundation, но реальных упоминаний об этом не нашел. Есть какой-то ответ в блоге https://www.mail-archive.com/analytics@lists.wikimedia.org/msg03619.html, но он не указывает прямо на использование ClickHouse. Вот этот вот указывает, но я с ходу не разобрал, что за источник вообще такой https://phabricator.wikimedia.org/T150343 + +- [Mercadona](mercadona.com) не нашел ни единой связи с ClickHouse. + +- [Zara](zara.com) не нашел связи с ClickHouse. + +- ByteDance, единственное прямое доказательство было ссылкой на уже не существующую вакансию. + [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) + + + + +- bdtc_2019 ++ cern (Mail.ru, MGID,) +- cpp_russia_2019 +- cpp_sprint_2019 +- data_at_scale +- database_saturday_2018 +- database_saturday_2018_2/pipeline ++ database_saturday_2018_2/octonica +- database_saturday_2019 ++ dataops_2019 (CARTO, Mercadona, Zara, Idealista, Corunet, ... Cloudflare, Spotify, Amadeus, Bloomberg, Cisco, Deutsche Bank, Tencent, ByteDance) +drafts yandex/ClickHouse -> ClickHouse/ClickHouse, part 2 5 months ago +evolution Correction on KuaiShou company name 3 months ago +group_by Changed tabs to spaces in code [#CLICKHOUSE-3]. 3 years ago +hash_tables yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +highload2016 Added historical presentation from HighLoad 2016 11 days ago +highload2017 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +highload2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +highload2019 Added presentation from HighLoad++ 2019 4 months ago +highload_siberia_2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +highload_siberia_2019 Added another presentation from HighLoad Siberia 2019 8 months ago +highload_spb_2019 Added presentation from Saint Highload 2019 11 months ago +hse_2019 Added one more presentation from HSE 8 months ago +internals add sources for the internals presentation 2 years ago +it_rzn_2019 Updated presentation from IT Rzn 3 months ago +meetup10 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup11 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup12 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup13 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup14 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup15 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup16 Added all presentations from Summer Berlin Meetup 2 years ago +meetup17 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup18 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup19 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup20 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup21 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup22 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup23 Added presentations from ClickHouse Meetup in San Francisco 9 months ago +meetup24 yandex/ClickHouse -> ClickHouse/ClickHouse, part 2 5 months ago +meetup25 Add lost slides from Novosibirsk 8 months ago +meetup26 add one more slide deck from Minsk meetup 8 months ago +meetup27 Added more presentations from 27th meetup. 7 months ago +meetup28 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup29 Merge branch 'master' of github.com:ClickHouse/clickhouse-presentations 5 months ago +meetup3 Presentation: added analysts part [#CLICKHOUSE-2]. 3 years ago +meetup30 Add more slides from Paris Meetup 4 months ago +meetup31 Correction on KuaiShou company name 3 months ago +meetup32 Correction on KuaiShou company name 3 months ago +meetup33 Correction on KuaiShou company name 3 months ago +meetup34 Add tokyo meetup 3 months ago +meetup35 Added half of presentations from ClickHouse Instanbul Meetup 3 months ago +meetup36/new_features Added a presentation from 36th ClickHouse Meetup 3 months ago +meetup37 Moved presentation from 37th ClickHouse Meetup 3 months ago +meetup38 Remaining Moscow meetup slides 3 months ago +meetup39 Remaining slides from SF meetup 14 days ago +meetup4 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup40 More slides from NYC meetup 19 days ago +meetup5 Added presentation from 5th meetup [#CLICKHOUSE-3]. 3 years ago +meetup6 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup7 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +meetup9 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +misc Added resized images for "evolution" article. 3 years ago +percona2017 percona2017 3 years ago +percona2018 Added part of presentations from Percona 2018 and Sunnyvale Meetup 2 years ago +percona2019 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +percona_europe_2017 add presentation from Percona Europe 2017 2 years ago +percona_europe_2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +percona_europe_2019 Merge branch 'master' of github.com:ClickHouse/clickhouse-presentations 5 months ago +pgday2017 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +rit2017 add sources for RIT++2017 3 years ago +rit2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +rit2019 Add rit2019 presentation 8 months ago +roadmap2018 Added roadmap for 2018..2019 15 months ago +shad2017 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +tbd yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +tutorials Create catboost_with_clickhouse_en.md 2 years ago +unknown_developers_reissue Added slightly modified version of "Unknown developers" presentation 15 months ago +uwdc yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +yatalks_2019 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago +yatalks_2019_moscow Fixed error in presentation from YaTalks 2019 in Moscow 3 months ago diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 725ef3a4f62..7c099c26580 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -1034,7 +1034,7 @@ Alias: `medianExactWeighted`. - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. **Returned value** @@ -1300,7 +1300,7 @@ Result: ## quantileTDigestWeighted {#quantiletdigestweighted} -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence number. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The performance of the function is lower than performance of [quantile](#quantile) or [quantileTiming](#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index 0367e15ba3a..f4ee9cffad2 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -860,7 +860,7 @@ FROM t quantile(level)(expr) ``` -Альяс: `median`. +Алиас: `median`. **Параметры** @@ -870,7 +870,7 @@ quantile(level)(expr) **Возвращаемое значение** -- Приблизительную квантиль заданного уровня. +- Приблизительный квантиль заданного уровня. Тип: @@ -926,7 +926,7 @@ SELECT quantile(val) FROM t quantileDeterministic(level)(expr, determinator) ``` -Альяс: `medianDeterministic`. +Алиас: `medianDeterministic`. **Параметры** @@ -936,7 +936,7 @@ quantileDeterministic(level)(expr, determinator) **Возвращаемое значение** -- Приблизительную квантиль заданного уровня. +- Приблизительный квантиль заданного уровня. Тип: @@ -993,7 +993,7 @@ SELECT quantileDeterministic(val, 1) FROM t quantileExact(level)(expr) ``` -Альяс: `medianExact`. +Алиас: `medianExact`. **Параметры** @@ -1046,7 +1046,7 @@ SELECT quantileExact(number) FROM numbers(10) quantileExactWeighted(level)(expr, weight) ``` -Альяс: `medianExactWeighted`. +Алиас: `medianExactWeighted`. **Параметры** @@ -1110,7 +1110,7 @@ SELECT quantileExactWeighted(n, val) FROM t quantileTiming(level)(expr) ``` -Альяс: `medianTiming`. +Алиас: `medianTiming`. **Параметры** @@ -1192,7 +1192,7 @@ SELECT quantileTiming(response_time) FROM t quantileTimingWeighted(level)(expr, weight) ``` -Альяс: `medianTimingWeighted`. +Алиас: `medianTimingWeighted`. **Параметры** @@ -1276,7 +1276,7 @@ SELECT quantileTimingWeighted(response_time, weight) FROM t quantileTDigest(level)(expr) ``` -Альяс: `medianTDigest`. +Алиас: `medianTDigest`. **Параметры** @@ -1333,7 +1333,7 @@ SELECT quantileTDigest(number) FROM numbers(10) quantileTDigestWeighted(level)(expr, weight) ``` -Альяс: `medianTDigest`. +Алиас: `medianTDigest`. **Параметры** @@ -1343,7 +1343,7 @@ quantileTDigestWeighted(level)(expr, weight) **Возвращаемое значение** -- Приблизительную квантиль заданного уровня. +- Приблизительный квантиль заданного уровня. Тип: From d19e21ba19bd6fa60506c7d28a7678269d1c6660 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 17:25:28 +0300 Subject: [PATCH 112/215] Default columns which depends on aliases --- dbms/src/Core/BlockInfo.cpp | 9 ++++++ dbms/src/Core/BlockInfo.h | 3 ++ .../AddingDefaultsBlockInputStream.cpp | 11 ++++++- .../Interpreters/evaluateMissingDefaults.cpp | 15 ++++++++-- .../01084_defaults_on_aliases.reference | 6 ++++ .../0_stateless/01084_defaults_on_aliases.sql | 30 +++++++++++++++++++ .../01091_insert_with_default_json.reference | 2 ++ .../01091_insert_with_default_json.sql | 26 ++++++++++++++++ 8 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01084_defaults_on_aliases.reference create mode 100644 dbms/tests/queries/0_stateless/01084_defaults_on_aliases.sql create mode 100644 dbms/tests/queries/0_stateless/01091_insert_with_default_json.reference create mode 100644 dbms/tests/queries/0_stateless/01091_insert_with_default_json.sql diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index b76c54cc2f2..78ee165bad1 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -74,4 +74,13 @@ const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(s return none; } +bool BlockMissingValues::hasDefaultBits(size_t column_idx) const +{ + auto it = rows_mask_by_column_id.find(column_idx); + if (it == rows_mask_by_column_id.end()) + return false; + + const auto & col_mask = it->second; + return std::find(col_mask.begin(), col_mask.end(), true) != col_mask.end(); +} } diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index 32a09d8cf70..55b57aa77ae 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -51,7 +51,10 @@ class BlockMissingValues public: using RowsBitMask = std::vector; /// a bit per row for a column + /// Get mask for column, column_idx is index inside corresponding block const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; + /// Check that we have to replace default value at least in one of columns + bool hasDefaultBits(size_t column_idx) const; void setBit(size_t column_idx, size_t row_idx); bool empty() const { return rows_mask_by_column_id.empty(); } size_t size() const { return rows_mask_by_column_id.size(); } diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 112afe61183..bf72ef27918 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -56,11 +56,20 @@ Block AddingDefaultsBlockInputStream::readImpl() if (block_missing_values.empty()) return res; + /// res block alredy has all columns values, with default value for type + /// (not value specified in table). We identify which columns we need to + /// recalculate with help of block_missing_values. Block evaluate_block{res}; /// remove columns for recalculation for (const auto & column : column_defaults) + { if (evaluate_block.has(column.first)) - evaluate_block.erase(column.first); + { + size_t column_idx = res.getPositionByName(column.first); + if (block_missing_values.hasDefaultBits(column_idx)) + evaluate_block.erase(column.first); + } + } if (!evaluate_block.columns()) evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared(), "_dummy"}); diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index d2907c0f0e4..c46a920726f 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -27,10 +28,20 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi const auto it = column_defaults.find(column.name); - /// expressions must be cloned to prevent modification by the ExpressionAnalyzer if (it != column_defaults.end()) { - auto cast_func = makeASTFunction("CAST", it->second.expression->clone(), std::make_shared(column.type->getName())); + /// expressions must be cloned to prevent modification by the ExpressionAnalyzer + auto column_default_expr = it->second.expression->clone(); + RequiredSourceColumnsVisitor::Data columns_context; + RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr); + NameSet required_columns_names = columns_context.requiredColumns(); + + for (const auto & required_column_name : required_columns_names) + if (auto rit = column_defaults.find(required_column_name); + rit != column_defaults.end() && rit->second.kind == ColumnDefaultKind::Alias) + default_expr_list->children.emplace_back(setAlias(rit->second.expression->clone(), required_column_name)); + + auto cast_func = makeASTFunction("CAST", column_default_expr, std::make_shared(column.type->getName())); default_expr_list->children.emplace_back(setAlias(cast_func, it->first)); } } diff --git a/dbms/tests/queries/0_stateless/01084_defaults_on_aliases.reference b/dbms/tests/queries/0_stateless/01084_defaults_on_aliases.reference new file mode 100644 index 00000000000..9b39b07db94 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01084_defaults_on_aliases.reference @@ -0,0 +1,6 @@ +1 1 +1 1 1 +2 2 4 +2 2 2 4 +3 3 9 +3 3 3 9 27 diff --git a/dbms/tests/queries/0_stateless/01084_defaults_on_aliases.sql b/dbms/tests/queries/0_stateless/01084_defaults_on_aliases.sql new file mode 100644 index 00000000000..2e4be37cc73 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01084_defaults_on_aliases.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS table_with_defaults_on_aliases; + +CREATE TABLE table_with_defaults_on_aliases (col1 UInt32, col2 ALIAS col1, col3 DEFAULT col2) Engine = MergeTree() ORDER BY tuple(); + +INSERT INTO table_with_defaults_on_aliases (col1) VALUES (1); + +SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 1; + +SELECT col1, col2, col3 FROM table_with_defaults_on_aliases WHERE col1 = 1; + +ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col4 UInt64 DEFAULT col2 * col3; + +INSERT INTO table_with_defaults_on_aliases (col1) VALUES (2); + +SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 2; + +SELECT col1, col2, col3, col4 FROM table_with_defaults_on_aliases WHERE col1 = 2; + +ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col5 UInt64 ALIAS col2 * col4; + +INSERT INTO table_with_defaults_on_aliases (col1) VALUES (3); + +SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 3; + +SELECT col1, col2, col3, col4, col5 FROM table_with_defaults_on_aliases WHERE col1 = 3; + + +ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col6 UInt64 MATERIALIZED col2 * col4; + +DROP TABLE IF EXISTS table_with_defaults_on_aliases; diff --git a/dbms/tests/queries/0_stateless/01091_insert_with_default_json.reference b/dbms/tests/queries/0_stateless/01091_insert_with_default_json.reference new file mode 100644 index 00000000000..8e9a3516771 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01091_insert_with_default_json.reference @@ -0,0 +1,2 @@ +0 0 test0 +ClickHouse is great ClickHouse is fast diff --git a/dbms/tests/queries/0_stateless/01091_insert_with_default_json.sql b/dbms/tests/queries/0_stateless/01091_insert_with_default_json.sql new file mode 100644 index 00000000000..d8b998cdd5e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01091_insert_with_default_json.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS table_with_complex_default; + +CREATE TABLE table_with_complex_default (i Int8, n UInt8 DEFAULT 42, s String DEFAULT concat('test', CAST(n, 'String'))) ENGINE=TinyLog; + +INSERT INTO table_with_complex_default FORMAT JSONEachRow {"i":0, "n": 0} + +SELECT * FROM table_with_complex_default; + +DROP TABLE IF EXISTS table_with_complex_default; + +DROP TABLE IF EXISTS test_default_using_alias; + +CREATE TABLE test_default_using_alias +( + what String, + a String DEFAULT concat(c, ' is great'), + b String DEFAULT concat(c, ' is fast'), + c String ALIAS concat(what, 'House') +) +ENGINE = TinyLog; + +INSERT INTO test_default_using_alias(what) VALUES ('Click'); + +SELECT a, b FROM test_default_using_alias; + +DROP TABLE IF EXISTS test_default_using_alias; From 720d9e287a06087b7a95a59747d250599ca9cfed Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 17:27:43 +0300 Subject: [PATCH 113/215] More comments --- dbms/src/Interpreters/evaluateMissingDefaults.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index c46a920726f..a33fc4617a7 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -32,14 +32,23 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi { /// expressions must be cloned to prevent modification by the ExpressionAnalyzer auto column_default_expr = it->second.expression->clone(); + + /// Our default may depend on columns with ALIAS as default expr which not present in block + /// we can easily add them from column_defaults struct RequiredSourceColumnsVisitor::Data columns_context; RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr); NameSet required_columns_names = columns_context.requiredColumns(); for (const auto & required_column_name : required_columns_names) + { + /// If we have such default column and it's alias than we should + /// add it into default_expression_list if (auto rit = column_defaults.find(required_column_name); rit != column_defaults.end() && rit->second.kind == ColumnDefaultKind::Alias) + { default_expr_list->children.emplace_back(setAlias(rit->second.expression->clone(), required_column_name)); + } + } auto cast_func = makeASTFunction("CAST", column_default_expr, std::make_shared(column.type->getName())); default_expr_list->children.emplace_back(setAlias(cast_func, it->first)); From 3554db185e44cab2fc5a8ba8d28a3aa8d184f02a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 17:43:54 +0300 Subject: [PATCH 114/215] Simplify aliases in test --- .../00385_storage_file_and_clickhouse-local_app.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh index 0d4abee9a06..a6eaa603b7f 100755 --- a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh +++ b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh @@ -48,14 +48,8 @@ echo echo ${CLICKHOUSE_LOCAL} -q "CREATE TABLE sophisticated_default ( - a UInt8 DEFAULT - ( - SELECT number FROM system.numbers LIMIT 3,1 - ), - b UInt8 ALIAS - ( - SELECT dummy+9 FROM system.one - ), + a UInt8 DEFAULT 3, + b UInt8 ALIAS a + 5, c UInt8 ) ENGINE = Memory; SELECT count() FROM system.tables WHERE name='sophisticated_default';" From 992bff97227bfc96dc9b3d5b9b05eea6acde55cf Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Mar 2020 17:43:54 +0300 Subject: [PATCH 115/215] Simplify aliases in test --- .../00385_storage_file_and_clickhouse-local_app.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh index 0d4abee9a06..a6eaa603b7f 100755 --- a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh +++ b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh @@ -48,14 +48,8 @@ echo echo ${CLICKHOUSE_LOCAL} -q "CREATE TABLE sophisticated_default ( - a UInt8 DEFAULT - ( - SELECT number FROM system.numbers LIMIT 3,1 - ), - b UInt8 ALIAS - ( - SELECT dummy+9 FROM system.one - ), + a UInt8 DEFAULT 3, + b UInt8 ALIAS a + 5, c UInt8 ) ENGINE = Memory; SELECT count() FROM system.tables WHERE name='sophisticated_default';" From 0d252082372939474353c8eab4b80ba03fe987d8 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 4 Mar 2020 07:58:44 +0100 Subject: [PATCH 116/215] Prevent loosing data in kafka when execption happen after read suffix, but before commit. Fixes #9378 --- .../Storages/Kafka/KafkaBlockInputStream.cpp | 4 +- .../integration/test_storage_kafka/test.py | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 80efc9f4996..c1252086b1d 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -199,8 +199,6 @@ Block KafkaBlockInputStream::readImpl() void KafkaBlockInputStream::readSuffixImpl() { - broken = false; - if (commit_in_suffix) commit(); } @@ -211,6 +209,8 @@ void KafkaBlockInputStream::commit() return; buffer->commit(); + + broken = false; } } diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 8c4f2fbc9ef..0a7cfd47e4e 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -7,6 +7,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV from helpers.client import QueryRuntimeException +from helpers.network import PartitionManager import json import subprocess @@ -34,6 +35,7 @@ instance = cluster.add_instance('instance', config_dir='configs', main_configs=['configs/kafka.xml', 'configs/log_conf.xml' ], with_kafka=True, + with_zookeeper=True, clickhouse_path_dir='clickhouse_path') kafka_id = '' @@ -1106,6 +1108,57 @@ def test_kafka_rebalance(kafka_cluster): assert result == 1, 'Messages from kafka get duplicated!' +@pytest.mark.timeout(1200) +def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): + messages = [json.dumps({'key': j+1, 'value': 'x' * 300}) for j in range(22)] + kafka_produce('no_holes_when_write_suffix_failed', messages) + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'no_holes_when_write_suffix_failed', + kafka_group_name = 'no_holes_when_write_suffix_failed', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 20; + + CREATE TABLE test.view (key UInt64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/no_holes_when_write_suffix_failed', 'node1') + ORDER BY key; + + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka + WHERE NOT sleepEachRow(1); + ''') + # the tricky part here is that disconnect should happen after write prefix, but before write suffix + # so i use sleepEachRow + with PartitionManager() as pm: + time.sleep(12) + pm.drop_instance_zk_connections(instance) + time.sleep(20) + pm.heal_all + + # connection restored and it will take a while until next block will be flushed + time.sleep(40) + + # as it's a bit tricky to hit the proper moment - let's check in logs if we did it correctly + assert instance.contains_in_log("ZooKeeper session has been expired.: while write prefix to view") + + result = instance.query('SELECT count(), uniqExact(key), max(key) FROM test.view') + print(result) + + # kafka_cluster.open_bash_shell('instance') + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + assert TSV(result) == TSV('22\t22\t22') + if __name__ == '__main__': From 8adcbc795264dfee2d9b1be9cae47bc378a44757 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 3 Mar 2020 22:01:20 +0300 Subject: [PATCH 117/215] Send progress after final update (like logs) Otherwise you will get non completed progress on TCP, HTTP does not affected. Final updateProgress called from the executeQueryImpl::finish_callback: (gdb) bt 0 DB::TCPHandler::updateProgress (this=0x7fffb581e000, value=...) at TCPHandler.cpp:1178 1 0x00007ffff7c62038 in DB::TCPHandler::::operator()(const DB::Progress &) const (...) at TCPHandler.cpp:127 4 0x00007fffe9a86671 in DB::::operator()(DB::IBlockInputStream *, DB::IBlockOutputStream *) (__closure=0x7fffb588f300, stream_in=0x7fffb5800290, stream_out=0x0) at executeQuery.cpp:450 5 0x00007fffe9a8b948 in std::_Function_handler >::_M_invoke(const std::_Any_data &, DB::IBlockInputStream *&&, DB::IBlockOutputStream *&&) (...) at std_function.h:300 6 0x00007ffff7c7482f in std::function<>::operator()(DB::IBlockInputStream*, DB::IBlockOutputStream*) const (..) at std_function.h:688 7 0x00007ffff7c6f82b in DB::BlockIO::onFinish (this=0x7fffb5820738) at BlockIO.h:43 8 0x00007ffff7c65eb0 in DB::TCPHandler::processOrdinaryQuery (this=0x7fffb581e000) at TCPHandler.cpp:540 9 0x00007ffff7c63a50 in DB::TCPHandler::runImpl (this=0x7fffb581e000) at TCPHandler.cpp:269 10 0x00007ffff7c6a6fd in DB::TCPHandler::run (this=0x7fffb581e000) at TCPHandler.cpp:1226 ... --- dbms/programs/server/TCPHandler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 8fb3c2c6c76..dbb14562722 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -538,6 +538,8 @@ void TCPHandler::processOrdinaryQuery() } state.io.onFinish(); + + sendProgress(); } @@ -657,6 +659,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads) } state.io.onFinish(); + + sendProgress(); } From f0a5985e4cb9d6f4e6da4a95b5af55e09ad48cde Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 4 Mar 2020 11:53:48 +0300 Subject: [PATCH 118/215] fix query in perf test --- .../performance/generate_table_function.xml | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/dbms/tests/performance/generate_table_function.xml b/dbms/tests/performance/generate_table_function.xml index bca2c53d74d..4674b81af99 100644 --- a/dbms/tests/performance/generate_table_function.xml +++ b/dbms/tests/performance/generate_table_function.xml @@ -8,23 +8,23 @@ - SELECT COUNT(*) FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000; - SELECT COUNT(*) FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i UUID', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000; - SELECT COUNT(*) FROM generate('i String', 10, 10, 1) LIMIT 100000; + SELECT COUNT(*) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i UUID', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000); + SELECT COUNT(*) FROM (SELECT * FROM generate('i String', 10, 10, 1) LIMIT 100000); From 2bd6474379015cb131d1d20b8170bfedad95f8a0 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 9 Jan 2020 17:50:34 +0300 Subject: [PATCH 119/215] Adding storage policies and disks while reloading configuration. --- dbms/programs/server/Server.cpp | 2 + dbms/src/Disks/DiskSpaceMonitor.cpp | 116 +++++++++++++++- dbms/src/Disks/DiskSpaceMonitor.h | 26 +++- dbms/src/Interpreters/Context.cpp | 51 ++++--- dbms/src/Interpreters/Context.h | 14 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 127 ++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.h | 12 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 +- .../MergeTree/MergeTreePartsMover.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 4 +- .../Storages/StorageReplicatedMergeTree.cpp | 4 +- .../Storages/System/StorageSystemDisks.cpp | 2 +- .../System/StorageSystemStoragePolicies.cpp | 2 +- 14 files changed, 250 insertions(+), 120 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c54ca43c219..6e6d997d7ed 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -470,6 +470,8 @@ int Server::main(const std::vector & /*args*/) if (config->has("max_partition_size_to_drop")) global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop")); + + global_context->updateStorageConfiguration(*config); }, /* already_loaded = */ true); diff --git a/dbms/src/Disks/DiskSpaceMonitor.cpp b/dbms/src/Disks/DiskSpaceMonitor.cpp index 40f208edd7d..9bbd7907d69 100644 --- a/dbms/src/Disks/DiskSpaceMonitor.cpp +++ b/dbms/src/Disks/DiskSpaceMonitor.cpp @@ -7,6 +7,7 @@ #include #include + #include @@ -48,7 +49,65 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con } -const DiskPtr & DiskSelector::operator[](const String & name) const +DiskSelectorPtr DiskSelector::updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context) const +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + auto & factory = DiskFactory::instance(); + + std::shared_ptr result = std::make_shared(*this); + + std::set old_disks_minus_new_disks; + for (const auto & [disk_name, _] : result->disks) + { + old_disks_minus_new_disks.insert(disk_name); + } + + for (const auto & disk_name : keys) + { + if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) + throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + + if (result->disks.count(disk_name) == 0) + { + auto disk_config_prefix = config_prefix + "." + disk_name; + result->disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context)); + } + else + { + old_disks_minus_new_disks.erase(disk_name); + + /// TODO: Ideally ClickHouse shall complain if disk has changed, but + /// implementing that may appear as not trivial task. + } + } + + if (!old_disks_minus_new_disks.empty()) + { + WriteBufferFromOwnString warning; + if (old_disks_minus_new_disks.size() == 1) + writeString("Disk ", warning); + else + writeString("Disks ", warning); + + int index = 0; + for (const String & name : old_disks_minus_new_disks) + { + if (index++ > 0) + writeString(", ", warning); + writeBackQuotedString(name, warning); + } + + writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning); + LOG_WARNING(&Logger::get("DiskSelector"), warning.str()); + } + + return result; +} + + +DiskPtr DiskSelector::get(const String & name) const { auto it = disks.find(name); if (it == disks.end()) @@ -61,7 +120,7 @@ Volume::Volume( String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - const DiskSelector & disk_selector) + DiskSelectorPtr disk_selector) : name(std::move(name_)) { Poco::Util::AbstractConfiguration::Keys keys; @@ -74,7 +133,7 @@ Volume::Volume( if (startsWith(disk, "disk")) { auto disk_name = config.getString(config_prefix + "." + disk); - disks.push_back(disk_selector[disk_name]); + disks.push_back(disk_selector->get(disk_name)); } } @@ -162,7 +221,7 @@ StoragePolicy::StoragePolicy( String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - const DiskSelector & disks) + DiskSelectorPtr disks) : name(std::move(name_)) { String volumes_prefix = config_prefix + ".volumes"; @@ -330,6 +389,28 @@ ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const } +void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const +{ + std::unordered_set new_volume_names; + for (const auto & volume : new_storage_policy->getVolumes()) + new_volume_names.insert(volume->getName()); + + for (const auto & volume : getVolumes()) + { + if (new_volume_names.count(volume->getName()) == 0) + throw Exception("New storage policy shall contain volumes of old one", ErrorCodes::LOGICAL_ERROR); + + std::unordered_set new_disk_names; + for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->disks) + new_disk_names.insert(disk->getName()); + + for (const auto & disk : volume->disks) + if (new_disk_names.count(disk->getName()) == 0) + throw Exception("New storage policy shall contain disks of old one", ErrorCodes::LOGICAL_ERROR); + } +} + + size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const { for (size_t i = 0; i < volumes.size(); ++i) @@ -346,7 +427,7 @@ size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const StoragePolicySelector::StoragePolicySelector( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - const DiskSelector & disks) + DiskSelectorPtr disks) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); @@ -368,18 +449,39 @@ StoragePolicySelector::StoragePolicySelector( /// Add default policy if it's not specified explicetly if (policies.find(default_storage_policy_name) == policies.end()) { - auto default_volume = std::make_shared(default_volume_name, std::vector{disks[default_disk_name]}, 0); + auto default_volume = std::make_shared(default_volume_name, std::vector{disks->get(default_disk_name)}, 0); auto default_policy = std::make_shared(default_storage_policy_name, Volumes{default_volume}, 0.0); policies.emplace(default_storage_policy_name, default_policy); } } -const StoragePolicyPtr & StoragePolicySelector::operator[](const String & name) const + +StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks) const +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + std::shared_ptr result = std::make_shared(config, config_prefix, disks); + + for (const auto & [name, policy] : policies) + { + if (result->policies.count(name) == 0) + throw Exception("Storage policy " + backQuote(name) + " is missing in new configuration", ErrorCodes::BAD_ARGUMENTS); + + policy->checkCompatibleWith(result->policies[name]); + } + + return result; +} + + +StoragePolicyPtr StoragePolicySelector::get(const String & name) const { auto it = policies.find(name); if (it == policies.end()) throw Exception("Unknown StoragePolicy " + name, ErrorCodes::UNKNOWN_POLICY); + return it->second; } diff --git a/dbms/src/Disks/DiskSpaceMonitor.h b/dbms/src/Disks/DiskSpaceMonitor.h index f59b5e164d3..e3382dc03d1 100644 --- a/dbms/src/Disks/DiskSpaceMonitor.h +++ b/dbms/src/Disks/DiskSpaceMonitor.h @@ -17,15 +17,21 @@ namespace DB { +class DiskSelector; +using DiskSelectorPtr = std::shared_ptr; + /// Parse .xml configuration and store information about disks /// Mostly used for introspection. class DiskSelector { public: DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context); + DiskSelector(const DiskSelector & from): disks(from.disks) {} + + DiskSelectorPtr updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context) const; /// Get disk by name - const DiskPtr & operator[](const String & name) const; + DiskPtr get(const String & name) const; /// Get all disks with names const auto & getDisksMap() const { return disks; } @@ -54,7 +60,7 @@ public: String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - const DiskSelector & disk_selector); + DiskSelectorPtr disk_selector); /// Next disk (round-robin) /// @@ -87,6 +93,8 @@ private: using VolumePtr = std::shared_ptr; using Volumes = std::vector; +class StoragePolicy; +using StoragePolicyPtr = std::shared_ptr; /** * Contains all information about volumes configuration for Storage. @@ -95,7 +103,7 @@ using Volumes = std::vector; class StoragePolicy { public: - StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DiskSelector & disks); + StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks); StoragePolicy(String name_, Volumes volumes_, double move_factor_); @@ -146,6 +154,9 @@ public: return getVolume(it->second); } + /// Checks if storage policy can be replaced by another one. + void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const; + private: Volumes volumes; const String name; @@ -158,17 +169,20 @@ private: }; -using StoragePolicyPtr = std::shared_ptr; +class StoragePolicySelector; +using StoragePolicySelectorPtr = std::shared_ptr; /// Parse .xml configuration and store information about policies /// Mostly used for introspection. class StoragePolicySelector { public: - StoragePolicySelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DiskSelector & disks); + StoragePolicySelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks); + + StoragePolicySelectorPtr updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks) const; /// Policy by name - const StoragePolicyPtr & operator[](const String & name) const; + StoragePolicyPtr get(const String & name) const; /// All policies const std::map & getPoliciesMap() const { return policies; } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index d8393c94f7b..8ba875249b3 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -151,9 +151,9 @@ struct ContextShared /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; /// Storage disk chooser for MergeTree engines - mutable std::unique_ptr merge_tree_disk_selector; + mutable std::shared_ptr merge_tree_disk_selector; /// Storage policy chooser for MergeTree engines - mutable std::unique_ptr merge_tree_storage_policy_selector; + mutable std::shared_ptr merge_tree_storage_policy_selector; std::optional merge_tree_settings; /// Settings of MergeTree* engines. std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) @@ -577,7 +577,7 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic } else { - StoragePolicyPtr tmp_policy = getStoragePolicySelector()[policy_name]; + StoragePolicyPtr tmp_policy = getStoragePolicySelector()->get(policy_name); if (tmp_policy->getVolumes().size() != 1) throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG); shared->tmp_volume = tmp_policy->getVolume(0); @@ -1892,17 +1892,17 @@ CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double par } -const DiskPtr & Context::getDisk(const String & name) const +DiskPtr Context::getDisk(const String & name) const { auto lock = getLock(); - const auto & disk_selector = getDiskSelector(); + auto disk_selector = getDiskSelector(); - return disk_selector[name]; + return disk_selector->get(name); } -DiskSelector & Context::getDiskSelector() const +DiskSelectorPtr Context::getDiskSelector() const { auto lock = getLock(); @@ -1911,23 +1911,23 @@ DiskSelector & Context::getDiskSelector() const constexpr auto config_name = "storage_configuration.disks"; auto & config = getConfigRef(); - shared->merge_tree_disk_selector = std::make_unique(config, config_name, *this); + shared->merge_tree_disk_selector = std::make_shared(config, config_name, *this); } - return *shared->merge_tree_disk_selector; + return shared->merge_tree_disk_selector; } -const StoragePolicyPtr & Context::getStoragePolicy(const String & name) const +StoragePolicyPtr Context::getStoragePolicy(const String & name) const { auto lock = getLock(); - auto & policy_selector = getStoragePolicySelector(); + auto policy_selector = getStoragePolicySelector(); - return policy_selector[name]; + return policy_selector->get(name); } -StoragePolicySelector & Context::getStoragePolicySelector() const +StoragePolicySelectorPtr Context::getStoragePolicySelector() const { auto lock = getLock(); @@ -1936,9 +1936,30 @@ StoragePolicySelector & Context::getStoragePolicySelector() const constexpr auto config_name = "storage_configuration.policies"; auto & config = getConfigRef(); - shared->merge_tree_storage_policy_selector = std::make_unique(config, config_name, getDiskSelector()); + shared->merge_tree_storage_policy_selector = std::make_shared(config, config_name, getDiskSelector()); + } + return shared->merge_tree_storage_policy_selector; +} + + +void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + + if (shared->merge_tree_disk_selector) + shared->merge_tree_disk_selector = shared->merge_tree_disk_selector->updateFromConfig(config, "storage_configuration.disks", *this); + + if (shared->merge_tree_storage_policy_selector) + { + try + { + shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig(config, "storage_configuration.policies", shared->merge_tree_disk_selector); + } + catch (Exception & e) + { + LOG_ERROR(shared->log, "An error has occured while reloading storage policies, storage policies were not applied: " << e.message()); + } } - return *shared->merge_tree_storage_policy_selector; } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 923b8aa8d54..c920f363585 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -93,9 +93,11 @@ struct StorageID; class IDisk; using DiskPtr = std::shared_ptr; class DiskSelector; +using DiskSelectorPtr = std::shared_ptr; class StoragePolicy; using StoragePolicyPtr = std::shared_ptr; class StoragePolicySelector; +using StoragePolicySelectorPtr = std::shared_ptr; class IOutputFormat; using OutputFormatPtr = std::shared_ptr; @@ -545,16 +547,18 @@ public: /// Lets you select the compression codec according to the conditions described in the configuration file. std::shared_ptr chooseCompressionCodec(size_t part_size, double part_size_ratio) const; - DiskSelector & getDiskSelector() const; + DiskSelectorPtr getDiskSelector() const; /// Provides storage disks - const DiskPtr & getDisk(const String & name) const; - const DiskPtr & getDefaultDisk() const { return getDisk("default"); } + DiskPtr getDisk(const String & name) const; + DiskPtr getDefaultDisk() const { return getDisk("default"); } - StoragePolicySelector & getStoragePolicySelector() const; + StoragePolicySelectorPtr getStoragePolicySelector() const; + + void updateStorageConfiguration(const Poco::Util::AbstractConfiguration & config); /// Provides storage politics schemes - const StoragePolicyPtr & getStoragePolicy(const String &name) const; + StoragePolicyPtr getStoragePolicy(const String & name) const; /// Get the server uptime in seconds. time_t getUptimeSeconds() const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 124aca5c168..d44501adab6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -147,7 +147,6 @@ MergeTreeData::MergeTreeData( , log_name(table_id_.getNameForLogs()) , log(&Logger::get(log_name)) , storage_settings(std::move(storage_settings_)) - , storage_policy(context_.getStoragePolicy(getSettings()->storage_policy)) , data_parts_by_info(data_parts_indexes.get()) , data_parts_by_state_and_info(data_parts_indexes.get()) , parts_mover(this) @@ -223,7 +222,7 @@ MergeTreeData::MergeTreeData( /// If not choose any if (version_file_path.empty()) - version_file_path = getFullPathOnDisk(storage_policy->getAnyDisk()) + "format_version.txt"; + version_file_path = getFullPathOnDisk(getStoragePolicy()->getAnyDisk()) + "format_version.txt"; bool version_file_exists = Poco::File(version_file_path).exists(); @@ -283,6 +282,11 @@ StorageInMemoryMetadata MergeTreeData::getInMemoryMetadata() const return metadata; } +StoragePolicyPtr MergeTreeData::getStoragePolicy() const +{ + return global_context.getStoragePolicy(getSettings()->storage_policy); +} + static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name) { for (const ExpressionAction & action : expr.getActions()) @@ -712,54 +716,10 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new } -void MergeTreeData::setStoragePolicy(const String & new_storage_policy_name, bool only_check) +void MergeTreeData::checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) { const auto old_storage_policy = getStoragePolicy(); - const auto & new_storage_policy = global_context.getStoragePolicySelector()[new_storage_policy_name]; - - std::unordered_set new_volume_names; - for (const auto & volume : new_storage_policy->getVolumes()) - new_volume_names.insert(volume->getName()); - - for (const auto & volume : old_storage_policy->getVolumes()) - { - if (new_volume_names.count(volume->getName()) == 0) - throw Exception("New storage policy shall contain volumes of old one", ErrorCodes::LOGICAL_ERROR); - - std::unordered_set new_disk_names; - for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->disks) - new_disk_names.insert(disk->getName()); - - for (const auto & disk : volume->disks) - if (new_disk_names.count(disk->getName()) == 0) - throw Exception("New storage policy shall contain disks of old one", ErrorCodes::LOGICAL_ERROR); - } - - std::unordered_set all_diff_disk_names; - for (const auto & disk : new_storage_policy->getDisks()) - all_diff_disk_names.insert(disk->getName()); - for (const auto & disk : old_storage_policy->getDisks()) - all_diff_disk_names.erase(disk->getName()); - - for (const String & disk_name : all_diff_disk_names) - { - const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name)); - if (Poco::File(path).exists()) - throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); - } - - if (!only_check) - { - for (const String & disk_name : all_diff_disk_names) - { - const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name)); - Poco::File(path).createDirectories(); - Poco::File(path + "detached").createDirectory(); - } - - storage_policy = new_storage_policy; - /// TODO: Query lock is fine but what about background moves??? And downloading of parts? - } + old_storage_policy->checkCompatibleWith(new_storage_policy); } @@ -902,7 +862,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) Strings part_file_names; Poco::DirectoryIterator end; - auto disks = storage_policy->getDisks(); + auto disks = getStoragePolicy()->getDisks(); /// Only check if user did touch storage configuration for this table. if (!getStoragePolicy()->isDefaultPolicy() && !skip_sanity_checks) @@ -912,7 +872,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) for (const auto & disk_ptr : disks) defined_disk_names.insert(disk_ptr->getName()); - for (auto & [disk_name, disk_ptr] : global_context.getDiskSelector().getDisksMap()) + for (auto & [disk_name, disk_ptr] : global_context.getDiskSelector()->getDisksMap()) { if (defined_disk_names.count(disk_name) == 0 && Poco::File(getFullPathOnDisk(disk_ptr)).exists()) { @@ -1355,7 +1315,7 @@ void MergeTreeData::rename( const String & new_table_path, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) { - auto disks = storage_policy->getDisks(); + auto disks = getStoragePolicy()->getDisks(); for (const auto & disk : disks) { @@ -1580,7 +1540,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S } if (changed_setting.name == "storage_policy") - setStoragePolicy(changed_setting.value.safeGet(), /* only_check = */ true); + checkStoragePolicy(global_context.getStoragePolicy(changed_setting.value.safeGet())); } } @@ -1897,14 +1857,41 @@ void MergeTreeData::changeSettings( if (new_settings) { const auto & new_changes = new_settings->as().changes; + + for (const auto & change : new_changes) + if (change.name == "storage_policy") + { + StoragePolicyPtr new_storage_policy = global_context.getStoragePolicy(change.value.safeGet()); + StoragePolicyPtr old_storage_policy = getStoragePolicy(); + + checkStoragePolicy(new_storage_policy); + + std::unordered_set all_diff_disk_names; + for (const auto & disk : new_storage_policy->getDisks()) + all_diff_disk_names.insert(disk->getName()); + for (const auto & disk : old_storage_policy->getDisks()) + all_diff_disk_names.erase(disk->getName()); + + for (const String & disk_name : all_diff_disk_names) + { + const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name)); + if (Poco::File(path).exists()) + throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); + } + + for (const String & disk_name : all_diff_disk_names) + { + const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name)); + Poco::File(path).createDirectories(); + Poco::File(path + "detached").createDirectory(); + } + /// FIXME how would that be done while reloading configuration??? + } + MergeTreeSettings copy = *getSettings(); copy.applyChanges(new_changes); storage_settings.set(std::make_unique(copy)); settings_ast = new_settings; - - for (const auto & change : new_changes) - if (change.name == "storage_policy") - setStoragePolicy(change.value.safeGet()); } } @@ -2916,9 +2903,9 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & else parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); - auto disk = storage_policy->getDiskByName(name); + auto disk = getStoragePolicy()->getDiskByName(name); if (!disk) - throw Exception("Disk " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK); + throw Exception("Disk " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK); parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) { @@ -2964,9 +2951,9 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String else parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); - auto volume = storage_policy->getVolumeByName(name); + auto volume = getStoragePolicy()->getVolumeByName(name); if (!volume) - throw Exception("Volume " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK); + throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK); if (parts.empty()) throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART); @@ -3208,7 +3195,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir); ActiveDataPartSet active_parts(format_version); - const auto disks = storage_policy->getDisks(); + const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) { const auto full_path = getFullPathOnDisk(disk); @@ -3282,7 +3269,7 @@ ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - auto reservation = storage_policy->reserve(expected_size); + auto reservation = getStoragePolicy()->reserve(expected_size); return checkAndReturnReservation(expected_size, std::move(reservation)); } @@ -3327,7 +3314,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_ auto ttl_entry = selectTTLEntryForTTLInfos(ttl_infos, time_of_move); if (ttl_entry) { - SpacePtr destination_ptr = ttl_entry->getDestination(storage_policy); + SpacePtr destination_ptr = ttl_entry->getDestination(getStoragePolicy()); if (!destination_ptr) { if (ttl_entry->destination_type == PartDestinationType::VOLUME) @@ -3356,12 +3343,12 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_ } } - reservation = storage_policy->reserve(expected_size, min_volume_index); + reservation = getStoragePolicy()->reserve(expected_size, min_volume_index); return reservation; } -SpacePtr MergeTreeData::TTLEntry::getDestination(const StoragePolicyPtr & policy) const +SpacePtr MergeTreeData::TTLEntry::getDestination(StoragePolicyPtr policy) const { if (destination_type == PartDestinationType::VOLUME) return policy->getVolumeByName(destination_name); @@ -3371,7 +3358,7 @@ SpacePtr MergeTreeData::TTLEntry::getDestination(const StoragePolicyPtr & policy return {}; } -bool MergeTreeData::TTLEntry::isPartInDestination(const StoragePolicyPtr & policy, const IMergeTreeDataPart & part) const +bool MergeTreeData::TTLEntry::isPartInDestination(StoragePolicyPtr policy, const IMergeTreeDataPart & part) const { if (destination_type == PartDestinationType::VOLUME) { @@ -3635,7 +3622,7 @@ String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const DiskPtr MergeTreeData::getDiskForPart(const String & part_name, const String & relative_path) const { - const auto disks = storage_policy->getDisks(); + const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) { const auto disk_path = getFullPathOnDisk(disk); @@ -3658,7 +3645,7 @@ String MergeTreeData::getFullPathForPart(const String & part_name, const String Strings MergeTreeData::getDataPaths() const { Strings res; - auto disks = storage_policy->getDisks(); + auto disks = getStoragePolicy()->getDisks(); for (const auto & disk : disks) res.push_back(getFullPathOnDisk(disk)); return res; @@ -3667,7 +3654,7 @@ Strings MergeTreeData::getDataPaths() const MergeTreeData::PathsWithDisks MergeTreeData::getDataPathsWithDisks() const { PathsWithDisks res; - auto disks = storage_policy->getDisks(); + auto disks = getStoragePolicy()->getDisks(); for (const auto & disk : disks) res.emplace_back(getFullPathOnDisk(disk), disk); return res; @@ -3818,7 +3805,7 @@ bool MergeTreeData::selectPartsAndMove() bool MergeTreeData::areBackgroundMovesNeeded() const { - auto policy = storage_policy; + auto policy = getStoragePolicy(); if (policy->getVolumes().size() > 1) return true; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index aad681aae48..144729caa53 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -367,7 +367,7 @@ public: ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const override; - StoragePolicyPtr getStoragePolicy() const override { return storage_policy; } + StoragePolicyPtr getStoragePolicy() const override; bool supportsPrewhere() const override { return true; } bool supportsSampling() const override { return sample_by_ast != nullptr; } @@ -702,7 +702,7 @@ public: size_t min_volume_index = 0) const; /// Choose disk with max available free space /// Reserves 0 bytes - ReservationPtr makeEmptyReservationOnLargestDisk() { return storage_policy->makeEmptyReservationOnLargestDisk(); } + ReservationPtr makeEmptyReservationOnLargestDisk() { return getStoragePolicy()->makeEmptyReservationOnLargestDisk(); } MergeTreeDataFormatVersion format_version; @@ -751,10 +751,10 @@ public: ASTPtr entry_ast; /// Returns destination disk or volume for this rule. - SpacePtr getDestination(const StoragePolicyPtr & policy) const; + SpacePtr getDestination(StoragePolicyPtr policy) const; /// Checks if given part already belongs destination disk or volume for this rule. - bool isPartInDestination(const StoragePolicyPtr & policy, const IMergeTreeDataPart & part) const; + bool isPartInDestination(StoragePolicyPtr policy, const IMergeTreeDataPart & part) const; bool isEmpty() const { return expression == nullptr; } }; @@ -827,8 +827,6 @@ protected: /// Use get and set to receive readonly versions. MultiVersion storage_settings; - StoragePolicyPtr storage_policy; - /// Work with data parts struct TagByInfo{}; @@ -931,6 +929,8 @@ protected: const IndicesASTs & old_indices, const IndicesASTs & new_indices) const; + void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy); + void setStoragePolicy(const String & new_storage_policy_name, bool only_check = false); /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 40c34960c60..509194579e8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -179,7 +179,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz data_settings->max_bytes_to_merge_at_max_space_in_pool, static_cast(free_entries) / data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge); - return std::min(max_size, static_cast(data.storage_policy->getMaxUnreservedFreeSpace() / DISK_USAGE_COEFFICIENT_TO_SELECT)); + return std::min(max_size, static_cast(data.getStoragePolicy()->getMaxUnreservedFreeSpace() / DISK_USAGE_COEFFICIENT_TO_SELECT)); } @@ -188,8 +188,8 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation() const auto data_settings = data.getSettings(); size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed); - /// DataPart can be store only at one disk. Get Max of free space at all disks - UInt64 disk_space = data.storage_policy->getMaxUnreservedFreeSpace(); + /// DataPart can be store only at one disk. Get maximum reservable free space at all disks. + UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace(); /// Allow mutations only if there are enough threads, leave free threads for merges else if (background_pool_size - busy_threads_in_pool >= data_settings->number_of_free_entries_in_pool_to_execute_mutation) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index ef19a24686e..e521c127a5a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -98,7 +98,7 @@ bool MergeTreePartsMover::selectPartsForMove( return false; std::unordered_map need_to_move; - const auto & policy = data->getStoragePolicy(); + const auto policy = data->getStoragePolicy(); const auto & volumes = policy->getVolumes(); if (volumes.size() > 0) diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 16135c04c48..8ed5c121011 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -286,7 +286,7 @@ void StorageDistributed::createStorage() } else { - auto policy = global_context.getStoragePolicySelector()[storage_policy]; + auto policy = global_context.getStoragePolicySelector()->get(storage_policy); if (policy->getVolumes().size() != 1) throw Exception("Policy for Distributed table, should have exactly one volume", ErrorCodes::BAD_ARGUMENTS); volume = policy->getVolume(0); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 06cb4b4be0d..fdd4e3c6f00 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -416,7 +416,7 @@ void StorageMergeTree::mutate(const MutationCommands & commands, const Context & { /// Choose any disk, because when we load mutations we search them at each disk /// where storage can be placed. See loadMutations(). - auto disk = storage_policy->getAnyDisk(); + auto disk = getStoragePolicy()->getAnyDisk(); MergeTreeMutationEntry entry(commands, getFullPathOnDisk(disk), insert_increment.get()); String file_name; Int64 version; @@ -618,7 +618,7 @@ bool StorageMergeTree::merge( } else { - UInt64 disk_space = storage_policy->getMaxUnreservedFreeSpace(); + UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); selected = merger_mutator.selectAllPartsToMergeWithinPartition(future_part, disk_space, can_merge, partition_id, final, out_disable_reason); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index bc6bcf28124..0ed2527a981 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3105,7 +3105,7 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p for (const DataPartPtr & part : data_parts) partition_ids.emplace(part->info.partition_id); - UInt64 disk_space = storage_policy->getMaxUnreservedFreeSpace(); + UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); for (const String & partition_id : partition_ids) { @@ -3133,7 +3133,7 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p else { - UInt64 disk_space = storage_policy->getMaxUnreservedFreeSpace(); + UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); String partition_id = getPartitionIDFromQuery(partition, query_context); selected = merger_mutator.selectAllPartsToMergeWithinPartition( future_merged_part, disk_space, can_merge, partition_id, final, &disable_reason); diff --git a/dbms/src/Storages/System/StorageSystemDisks.cpp b/dbms/src/Storages/System/StorageSystemDisks.cpp index 8859882c399..5ddf7de9ec8 100644 --- a/dbms/src/Storages/System/StorageSystemDisks.cpp +++ b/dbms/src/Storages/System/StorageSystemDisks.cpp @@ -41,7 +41,7 @@ Pipes StorageSystemDisks::read( const auto & disk_selector = context.getDiskSelector(); - for (const auto & [disk_name, disk_ptr] : disk_selector.getDisksMap()) + for (const auto & [disk_name, disk_ptr] : disk_selector->getDisksMap()) { col_name->insert(disk_name); col_path->insert(disk_ptr->getPath()); diff --git a/dbms/src/Storages/System/StorageSystemStoragePolicies.cpp b/dbms/src/Storages/System/StorageSystemStoragePolicies.cpp index e394d24c8a3..69962d42d18 100644 --- a/dbms/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/dbms/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -46,7 +46,7 @@ Pipes StorageSystemStoragePolicies::read( const auto & policy_selector = context.getStoragePolicySelector(); - for (const auto & [policy_name, policy_ptr] : policy_selector.getPoliciesMap()) + for (const auto & [policy_name, policy_ptr] : policy_selector->getPoliciesMap()) { const auto & volumes = policy_ptr->getVolumes(); for (size_t i = 0; i != volumes.size(); ++i) From 2ee082193154c558115cf18a0a6ea22a9a2a3d23 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 25 Feb 2020 05:59:02 +0300 Subject: [PATCH 120/215] Added tests for reloading storage configuration. --- dbms/tests/integration/helpers/cluster.py | 12 +- .../__init__.py | 0 .../configs/config.d/cluster.xml | 16 + .../config.d/storage_configuration.xml | 113 +++++ .../configs/logs_config.xml | 17 + .../test.py | 472 ++++++++++++++++++ 6 files changed, 624 insertions(+), 6 deletions(-) create mode 100644 dbms/tests/integration/test_reloading_storage_configuration/__init__.py create mode 100644 dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/cluster.xml create mode 100644 dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/storage_configuration.xml create mode 100644 dbms/tests/integration/test_reloading_storage_configuration/configs/logs_config.xml create mode 100644 dbms/tests/integration/test_reloading_storage_configuration/test.py diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index bc736ee9990..3537475ce96 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -880,19 +880,19 @@ class ClickHouseInstance: # used by all utils with any config conf_d_dir = p.abspath(p.join(configs_dir, 'conf.d')) # used by server with main config.xml - config_d_dir = p.abspath(p.join(configs_dir, 'config.d')) + self.config_d_dir = p.abspath(p.join(configs_dir, 'config.d')) users_d_dir = p.abspath(p.join(configs_dir, 'users.d')) os.mkdir(conf_d_dir) - os.mkdir(config_d_dir) + os.mkdir(self.config_d_dir) os.mkdir(users_d_dir) # The file is named with 0_ prefix to be processed before other configuration overloads. - shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), config_d_dir) + shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir) # Generate and write macros file macros = self.macros.copy() macros['instance'] = self.name - with open(p.join(config_d_dir, 'macros.xml'), 'w') as macros_config: + with open(p.join(self.config_d_dir, 'macros.xml'), 'w') as macros_config: macros_config.write(self.dict_to_xml({"macros": macros})) # Put ZooKeeper config @@ -905,7 +905,7 @@ class ClickHouseInstance: # Copy config.d configs for path in self.custom_main_config_paths: - shutil.copy(path, config_d_dir) + shutil.copy(path, self.config_d_dir) # Copy users.d configs for path in self.custom_user_config_paths: @@ -976,7 +976,7 @@ class ClickHouseInstance: binary_volume=binary_volume, odbc_bridge_volume=odbc_bridge_volume, configs_dir=configs_dir, - config_d_dir=config_d_dir, + config_d_dir=self.config_d_dir, db_dir=db_dir, tmpfs=str(self.tmpfs), logs_dir=logs_dir, diff --git a/dbms/tests/integration/test_reloading_storage_configuration/__init__.py b/dbms/tests/integration/test_reloading_storage_configuration/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/cluster.xml b/dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/cluster.xml new file mode 100644 index 00000000000..ec7c9b8e4f8 --- /dev/null +++ b/dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/cluster.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + \ No newline at end of file diff --git a/dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/storage_configuration.xml b/dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..9abbdd26650 --- /dev/null +++ b/dbms/tests/integration/test_reloading_storage_configuration/configs/config.d/storage_configuration.xml @@ -0,0 +1,113 @@ + + + + + + 1024 + + + /jbod1/ + + + /jbod2/ + 10485760 + + + + /external/ + + + + + + +
+ jbod1 +
+ + external + +
+
+ + + + + jbod1 + + + external + + + + + + + +
+ jbod1 + jbod2 + 10485760 + +
+ + external + +
+
+ + + + +
+ jbod1 +
+ + external + +
+ 0.7 +
+ + + + + + default + 2097152 + + + + external + 20971520 + + + + + + + + + + default + 0 + + + external + + + jbod1 + 1024 + + + jbod2 + 1024000000 + + + + +
+ +
+ +
diff --git a/dbms/tests/integration/test_reloading_storage_configuration/configs/logs_config.xml b/dbms/tests/integration/test_reloading_storage_configuration/configs/logs_config.xml new file mode 100644 index 00000000000..bdf1bbc11c1 --- /dev/null +++ b/dbms/tests/integration/test_reloading_storage_configuration/configs/logs_config.xml @@ -0,0 +1,17 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + + system + part_log
+ 500 +
+
diff --git a/dbms/tests/integration/test_reloading_storage_configuration/test.py b/dbms/tests/integration/test_reloading_storage_configuration/test.py new file mode 100644 index 00000000000..c9effcdd67a --- /dev/null +++ b/dbms/tests/integration/test_reloading_storage_configuration/test.py @@ -0,0 +1,472 @@ +import collections +import os +import re +import shutil +import time +import xml.etree.ElementTree as ET + +import pytest + +import helpers.client +import helpers.cluster + + +cluster = helpers.cluster.ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', + config_dir='configs', + main_configs=['configs/logs_config.xml'], + with_zookeeper=True, + stay_alive=True, + tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/jbod3:size=40M', '/jbod4:size=40M', '/external:size=200M'], + macros={"shard": 0, "replica": 1} ) + +node2 = cluster.add_instance('node2', + config_dir='configs', + main_configs=['configs/logs_config.xml'], + with_zookeeper=True, + stay_alive=True, + tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/jbod3:size=40M', '/jbod4:size=40M', '/external:size=200M'], + macros={"shard": 0, "replica": 2} ) + + +def get_log(node): + return node.exec_in_container(["bash", "-c", "cat /var/log/clickhouse-server/clickhouse-server.log"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def start_over(): + shutil.copy(os.path.join(os.path.dirname(__file__), "configs/config.d/storage_configuration.xml"), os.path.join(node1.config_d_dir, "storage_configuration.xml")) + + for node in (node1, node2): + separate_configuration_path = os.path.join(node.config_d_dir, "separate_configuration.xml") + try: + os.remove(separate_configuration_path) + except: + """""" + + +def add_disk(node, name, path, separate_file=False): + separate_configuration_path = os.path.join(node.config_d_dir, "separate_configuration.xml") + + try: + if separate_file: + tree = ET.parse(separate_configuration_path) + else: + tree = ET.parse(os.path.join(node.config_d_dir, "storage_configuration.xml")) + except: + tree = ET.ElementTree(ET.fromstring('')) + root = tree.getroot() + new_disk = ET.Element(name) + new_path = ET.Element("path") + new_path.text = path + new_disk.append(new_path) + root.find("storage_configuration").find("disks").append(new_disk) + if separate_file: + tree.write(separate_configuration_path) + else: + tree.write(os.path.join(node.config_d_dir, "storage_configuration.xml")) + + +def add_policy(node, name, volumes): + tree = ET.parse(os.path.join(node.config_d_dir, "storage_configuration.xml")) + root = tree.getroot() + new_policy = ET.Element(name) + new_volumes = ET.Element("volumes") + for volume, disks in volumes.items(): + new_volume = ET.Element(volume) + for disk in disks: + new_disk = ET.Element("disk") + new_disk.text = disk + new_volume.append(new_disk) + new_volumes.append(new_volume) + new_policy.append(new_volumes) + root.find("storage_configuration").find("policies").append(new_policy) + tree.write(os.path.join(node.config_d_dir, "storage_configuration.xml")) + + +def test_add_disk(started_cluster): + try: + name = "test_add_disk" + engine = "MergeTree()" + + start_over() + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + assert "jbod3" not in set(node1.query("SELECT name FROM system.disks").splitlines()) + + add_disk(node1, "jbod3", "/jbod3/") + node1.query("SYSTEM RELOAD CONFIG") + + assert "jbod3" in set(node1.query("SELECT name FROM system.disks").splitlines()) + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_add_disk_to_separate_config(started_cluster): + try: + name = "test_add_disk" + engine = "MergeTree()" + + start_over() + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + assert "jbod3" not in set(node1.query("SELECT name FROM system.disks").splitlines()) + + add_disk(node1, "jbod3", "/jbod3/", separate_file=True) + node1.query("SYSTEM RELOAD CONFIG") + + assert "jbod3" in set(node1.query("SELECT name FROM system.disks").splitlines()) + start_over() + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_add_policy(started_cluster): + try: + name = "test_add_policy" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + add_policy(node1, "cool_policy", {"volume1": ["jbod3", "jbod4"]}) + node1.query("SYSTEM RELOAD CONFIG") + + disks = set(node1.query("SELECT name FROM system.disks").splitlines()) + assert "cool_policy" in set(node1.query("SELECT policy_name FROM system.storage_policies").splitlines()) + assert {"volume1"} == set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'cool_policy'").splitlines()) + assert {"['jbod3','jbod4']"} == set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'cool_policy'").splitlines()) + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_new_policy_works(started_cluster): + try: + name = "test_new_policy_works" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + add_policy(node1, "cool_policy", {"volume1": ["jbod3"]}) + node1.query("SYSTEM RELOAD CONFIG") + + # Incompatible storage policy. + with pytest.raises(helpers.client.QueryRuntimeException): + node1.query(""" + ALTER TABLE {name} MODIFY SETTING storage_policy='cool_policy' + """.format(name=name)) + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", collections.OrderedDict([("volume1", ["jbod3"]), ("main", ["jbod1", "jbod2"]), ("external", ["external"])])) + node1.query("SYSTEM RELOAD CONFIG") + + node1.query(""" + ALTER TABLE {name} MODIFY SETTING storage_policy='cool_policy' + """.format(name=name)) + + node1.query(""" + INSERT INTO TABLE {name} VALUES (1) + """.format(name=name)) + assert {"jbod3"} == set(node1.query("SELECT disk_name FROM system.parts WHERE active = 1 AND table = '{name}'".format(name=name)).splitlines()) + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_add_volume_to_policy(started_cluster): + try: + name = "test_add_volume_to_policy" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", {"volume1": ["jbod3"]}) + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", collections.OrderedDict([("volume1", ["jbod3"]), ("volume2", ["jbod4"])])) + node1.query("SYSTEM RELOAD CONFIG") + + volumes = set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'cool_policy'").splitlines()) + disks_sets = set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'cool_policy'").splitlines()) + assert {"volume1", "volume2"} == volumes + assert {"['jbod3']", "['jbod4']"} == disks_sets + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_add_disk_to_policy(started_cluster): + try: + name = "test_add_disk_to_policy" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", {"volume1": ["jbod3"]}) + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", {"volume1": ["jbod3","jbod4"]}) + node1.query("SYSTEM RELOAD CONFIG") + + volumes = set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'cool_policy'").splitlines()) + disks_sets = set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'cool_policy'").splitlines()) + assert {"volume1"} == volumes + assert {"['jbod3','jbod4']"} == disks_sets + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_remove_disk(started_cluster): + try: + name = "test_remove_disk" + engine = "MergeTree()" + + start_over() + add_disk(node1, "remove_disk_jbod3", "/jbod3/") + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + assert "remove_disk_jbod3" in set(node1.query("SELECT name FROM system.disks").splitlines()) + + start_over() + node1.query("SYSTEM RELOAD CONFIG") + + assert "remove_disk_jbod3" in set(node1.query("SELECT name FROM system.disks").splitlines()) + assert re.search("Warning.*remove_disk_jbod3", get_log(node1)) + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_remove_policy(started_cluster): + try: + name = "test_remove_policy" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "remove_policy_cool_policy", {"volume1": ["jbod3", "jbod4"]}) + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + assert "remove_policy_cool_policy" in set(node1.query("SELECT policy_name FROM system.storage_policies").splitlines()) + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + node1.query("SYSTEM RELOAD CONFIG") + + assert "remove_policy_cool_policy" in set(node1.query("SELECT policy_name FROM system.storage_policies").splitlines()) + assert re.search("Error.*remove_policy_cool_policy", get_log(node1)) + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_remove_volume_from_policy(started_cluster): + try: + name = "test_remove_volume_from_policy" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "test_remove_volume_from_policy_cool_policy", collections.OrderedDict([("volume1", ["jbod3"]), ("volume2", ["jbod4"])])) + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + volumes = set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'test_remove_volume_from_policy_cool_policy'").splitlines()) + disks_sets = set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'test_remove_volume_from_policy_cool_policy'").splitlines()) + assert {"volume1", "volume2"} == volumes + assert {"['jbod3']", "['jbod4']"} == disks_sets + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", {"volume1": ["jbod3"]}) + node1.query("SYSTEM RELOAD CONFIG") + + volumes = set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'test_remove_volume_from_policy_cool_policy'").splitlines()) + disks_sets = set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'test_remove_volume_from_policy_cool_policy'").splitlines()) + assert {"volume1", "volume2"} == volumes + assert {"['jbod3']", "['jbod4']"} == disks_sets + assert re.search("Error.*test_remove_volume_from_policy_cool_policy", get_log(node1)) + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" + + +def test_remove_disk_from_policy(started_cluster): + try: + name = "test_remove_disk_from_policy" + engine = "MergeTree()" + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "test_remove_disk_from_policy_cool_policy", {"volume1": ["jbod3","jbod4"]}) + node1.restart_clickhouse(kill=True) + time.sleep(2) + + node1.query(""" + CREATE TABLE {name} ( + d UInt64 + ) ENGINE = {engine} + ORDER BY d + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + volumes = set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'test_remove_disk_from_policy_cool_policy'").splitlines()) + disks_sets = set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'test_remove_disk_from_policy_cool_policy'").splitlines()) + assert {"volume1"} == volumes + assert {"['jbod3','jbod4']"} == disks_sets + + start_over() + add_disk(node1, "jbod3", "/jbod3/") + add_disk(node1, "jbod4", "/jbod4/") + add_policy(node1, "cool_policy", {"volume1": ["jbod3"]}) + node1.query("SYSTEM RELOAD CONFIG") + + volumes = set(node1.query("SELECT volume_name FROM system.storage_policies WHERE policy_name = 'test_remove_disk_from_policy_cool_policy'").splitlines()) + disks_sets = set(node1.query("SELECT disks FROM system.storage_policies WHERE policy_name = 'test_remove_disk_from_policy_cool_policy'").splitlines()) + assert {"volume1"} == volumes + assert {"['jbod3','jbod4']"} == disks_sets + assert re.search("Error.*test_remove_disk_from_policy_cool_policy", get_log(node1)) + + finally: + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + """""" From 5f1d07a71489953e96a9c8519a5f56dfb4d88019 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 4 Mar 2020 16:31:53 +0100 Subject: [PATCH 121/215] increase timeout to make even slowest build green --- dbms/tests/integration/test_storage_kafka/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 0a7cfd47e4e..cf9f32e52b8 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -1142,7 +1142,8 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): pm.heal_all # connection restored and it will take a while until next block will be flushed - time.sleep(40) + # it takes years on CI :\ + time.sleep(90) # as it's a bit tricky to hit the proper moment - let's check in logs if we did it correctly assert instance.contains_in_log("ZooKeeper session has been expired.: while write prefix to view") From ecc65a21dbb8c53681e17db15c91f9d1b8eea60d Mon Sep 17 00:00:00 2001 From: Slach Date: Wed, 4 Mar 2020 20:44:34 +0500 Subject: [PATCH 122/215] add reference to http_server_default_response in HTTP interface description Signed-off-by: Slach --- docs/en/interfaces/http.md | 5 +++-- docs/en/operations/server_settings/settings.md | 2 +- docs/ru/interfaces/http.md | 4 +++- docs/ru/operations/server_settings/settings.md | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index aa545bfed97..8badede4665 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -3,8 +3,9 @@ The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. By default, clickhouse-server listens for HTTP on port 8123 (this can be changed in the config). -If you make a GET / request without parameters, it returns the string "Ok." (with a line feed at the end). You can use this in health-check scripts. - +If you make a GET / request without parameters, it returns 200 response code and the string "Ok." (with a line feed at the end). +[http_server_default_response](server_settings/settings.md#server_settings-http_server_default_response) settings can change default HTTP response. +Be careful when use this in health-check scripts. ```bash $ curl 'http://localhost:8123/' Ok. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index eae135796a0..218f29aff8a 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -215,7 +215,7 @@ If `http_port` is specified, the openSSL configuration is ignored even if it is ``` -## http_server_default_response +## http_server_default_response {#server_settings-http_server_default_response} The page that is shown by default when you access the ClickHouse HTTP(s) server. diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index d1412bbd70e..38f3362cab7 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -3,7 +3,9 @@ HTTP интерфейс позволяет использовать ClickHouse на любой платформе, из любого языка программирования. У нас он используется для работы из Java и Perl, а также из shell-скриптов. В других отделах, HTTP интерфейс используется из Perl, Python и Go. HTTP интерфейс более ограничен по сравнению с родным интерфейсом, но является более совместимым. По умолчанию, clickhouse-server слушает HTTP на порту 8123 (это можно изменить в конфиге). -Если запросить GET / без параметров, то вернётся строка "Ok." (с переводом строки на конце). Это может быть использовано в скриптах проверки доступности. +Если запросить GET / без параметров, то вернётся строка "Ok." (с переводом строки на конце). +Настройка [http_server_default_response](server_settings/settings.md#server_settings-http_server_default_response) может изменить тело HTTP ответа по умолчанию. +Будьте осторожны в скриптах проверки доступности. ```bash $ curl 'http://localhost:8123/' diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index e72ae2c0054..0b49146fbdf 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -207,7 +207,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## http_server_default_response +## http_server_default_response {#server_settings-http_server_default_response} Страница, показываемая по умолчанию, при обращении к HTTP(s) серверу ClickHouse. From 1be66874f918e8e0c7441bb6db239a3d2e92623a Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 4 Mar 2020 17:38:12 +0100 Subject: [PATCH 123/215] Kafka: catch and log exceptions in desctructors. Fixes #9494 --- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 22 ++++++++++--- .../integration/test_storage_kafka/test.py | 32 +++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 10f5fd0b47a..63292711ebb 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -78,9 +78,16 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() { /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077 - consumer->unsubscribe(); - consumer->unassign(); - while (consumer->get_consumer_queue().next_event(100ms)); + try { + if (!consumer->get_subscription().empty()) + consumer->unsubscribe(); + if (!assignment.empty()) { + consumer->unassign(); + } + while (consumer->get_consumer_queue().next_event(100ms)); + } catch (const cppkafka::HandleException & e) { + LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer destructor: " << e.what()); + } } void ReadBufferFromKafkaConsumer::commit() @@ -184,7 +191,14 @@ void ReadBufferFromKafkaConsumer::unsubscribe() current = messages.begin(); BufferBase::set(nullptr, 0, 0); - consumer->unsubscribe(); + // it should not raise exception as used in destructor + try { + if (!consumer->get_subscription().empty()) + consumer->unsubscribe(); + } catch (const cppkafka::HandleException &e) { + LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer::unsubscribe: " << e.what()); + } + } diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index cf9f32e52b8..3242b59bca3 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -1161,6 +1161,38 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): assert TSV(result) == TSV('22\t22\t22') +@pytest.mark.timeout(120) +def test_exception_from_destructor(kafka_cluster): + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'xyz', + kafka_group_name = '', + kafka_format = 'JSONEachRow'; + ''') + instance.query_and_get_error(''' + SELECT * FROM test.kafka; + ''') + instance.query(''' + DROP TABLE test.kafka; + ''') + + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'xyz', + kafka_group_name = '', + kafka_format = 'JSONEachRow'; + ''') + instance.query(''' + DROP TABLE test.kafka; + ''') + + kafka_cluster.open_bash_shell('instance') + assert TSV(instance.query('SELECT 1')) == TSV('1') + if __name__ == '__main__': cluster.start() From 9705e3ffab16ba6909eef22c790b50243da7d4c0 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 4 Mar 2020 19:07:36 +0100 Subject: [PATCH 124/215] Style fixes --- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 63292711ebb..7edb4c0ac4f 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -78,14 +78,16 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() { /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077 - try { + try + { if (!consumer->get_subscription().empty()) consumer->unsubscribe(); - if (!assignment.empty()) { + if (!assignment.empty()) consumer->unassign(); - } while (consumer->get_consumer_queue().next_event(100ms)); - } catch (const cppkafka::HandleException & e) { + } + catch (const cppkafka::HandleException & e) + { LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer destructor: " << e.what()); } } @@ -192,10 +194,13 @@ void ReadBufferFromKafkaConsumer::unsubscribe() BufferBase::set(nullptr, 0, 0); // it should not raise exception as used in destructor - try { + try + { if (!consumer->get_subscription().empty()) consumer->unsubscribe(); - } catch (const cppkafka::HandleException &e) { + } + catch (const cppkafka::HandleException & e) + { LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer::unsubscribe: " << e.what()); } From 156e6246c12b889c43b4be0193d34330cc4b383c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 06:12:07 +0300 Subject: [PATCH 125/215] Better code around sessions --- dbms/src/Interpreters/Context.cpp | 328 ++++++++++++++---------------- dbms/src/Interpreters/Context.h | 13 +- 2 files changed, 160 insertions(+), 181 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 1c2d4456d96..0a0ec748c56 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -96,6 +96,160 @@ namespace ErrorCodes } +/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. +class Sessions +{ +public: + using Key = Context::SessionKey; + + ~Sessions() + { + try + { + { + std::lock_guard lock{mutex}; + quit = true; + } + + cond.notify_one(); + thread.join(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + /// Find existing session or create a new. + std::shared_ptr acquireSession(const Key & key, Context & context, std::chrono::steady_clock::duration timeout, bool throw_if_not_found) + { + std::unique_lock lock(mutex); + + auto it = sessions.find(key); + if (it == sessions.end()) + { + if (throw_if_not_found) + throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); + + /// Create a new session from current context. + auto new_session = std::make_shared(context); + scheduleCloseSession(key, *new_session, timeout, lock); + it = sessions.insert(std::make_pair(key, std::move(new_session))).first; + } + else if (it->second->client_info.current_user != context.client_info.current_user) + { + throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR); + } + + const auto & session = it->second; + + if (session->session_is_used) + throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); + + session->session_is_used = true; + return session; + } + + void releaseSession(const Key & key, Context & session, std::chrono::steady_clock::duration timeout) + { + std::unique_lock lock(mutex); + session.session_is_used = false; + scheduleCloseSession(key, session, timeout, lock); + } + +private: + class SessionKeyHash + { + public: + size_t operator()(const Key & key) const + { + SipHash hash; + hash.update(key.first); + hash.update(key.second); + return hash.get64(); + } + }; + + using Container = std::unordered_map, SessionKeyHash>; + using CloseTimes = std::deque>; + Container sessions; + CloseTimes close_times; + std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1); + std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now(); + UInt64 close_cycle = 0; + + void scheduleCloseSession(const Key & key, Context & session, std::chrono::steady_clock::duration timeout, std::unique_lock &) + { + const UInt64 close_index = timeout / close_interval + 1; + const auto new_close_cycle = close_cycle + close_index; + + if (session.session_close_cycle != new_close_cycle) + { + session.session_close_cycle = new_close_cycle; + if (close_times.size() < close_index + 1) + close_times.resize(close_index + 1); + close_times[close_index].emplace_back(key); + } + } + + void cleanThread() + { + setThreadName("SessionCleaner"); + + std::unique_lock lock{mutex}; + + while (true) + { + auto interval = closeSessions(lock); + + if (cond.wait_for(lock, interval, [this]() -> bool { return quit; })) + break; + } + } + + /// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added. + std::chrono::steady_clock::duration closeSessions(std::unique_lock & lock) + { + const auto now = std::chrono::steady_clock::now(); + + /// The time to close the next session did not come + if (now < close_cycle_time) + return close_cycle_time - now; /// Will sleep until it comes. + + const auto current_cycle = close_cycle; + + ++close_cycle; + close_cycle_time = now + close_interval; + + if (close_times.empty()) + return close_interval; + + auto & sessions_to_close = close_times.front(); + + for (const auto & key : sessions_to_close) + { + const auto session = sessions.find(key); + + if (session != sessions.end() && session->second->session_close_cycle <= current_cycle) + { + if (session->second->session_is_used) + scheduleCloseSession(key, *session->second, std::chrono::seconds(0), lock); + else + sessions.erase(session); + } + } + + close_times.pop_front(); + return close_interval; + } + + std::mutex mutex; + std::condition_variable cond; + std::atomic quit{false}; + ThreadFromGlobalPool thread{&Sessions::cleanThread, this}; +}; + + /** Set of known objects (environment), that could be used in query. * Shared (global) part. Order of members (especially, order of destruction) is very important. */ @@ -166,27 +320,7 @@ struct ContextShared std::optional trace_collector; /// Thread collecting traces from threads executing queries - /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. - - class SessionKeyHash - { - public: - size_t operator()(const Context::SessionKey & key) const - { - SipHash hash; - hash.update(key.first); - hash.update(key.second); - return hash.get64(); - } - }; - - using Sessions = std::unordered_map, SessionKeyHash>; - using CloseTimes = std::deque>; - mutable Sessions sessions; - mutable CloseTimes close_times; - std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1); - std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now(); - UInt64 close_cycle = 0; + Sessions sessions; /// Controls named HTTP sessions. /// Clusters for distributed tables /// Initialized on demand (on distributed storages initialization) since Settings should be initialized @@ -371,100 +505,15 @@ Context::SessionKey Context::getSessionKey(const String & session_id) const } -void Context::scheduleCloseSession(const Context::SessionKey & key, std::chrono::steady_clock::duration timeout) +std::shared_ptr Context::acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) { - const UInt64 close_index = timeout / shared->close_interval + 1; - const auto new_close_cycle = shared->close_cycle + close_index; - - if (session_close_cycle != new_close_cycle) - { - session_close_cycle = new_close_cycle; - if (shared->close_times.size() < close_index + 1) - shared->close_times.resize(close_index + 1); - shared->close_times[close_index].emplace_back(key); - } -} - - -std::shared_ptr Context::acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) const -{ - auto lock = getLock(); - - const auto & key = getSessionKey(session_id); - auto it = shared->sessions.find(key); - - if (it == shared->sessions.end()) - { - if (session_check) - throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); - - auto new_session = std::make_shared(*this); - - new_session->scheduleCloseSession(key, timeout); - - it = shared->sessions.insert(std::make_pair(key, std::move(new_session))).first; - } - else if (it->second->client_info.current_user != client_info.current_user) - { - throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR); - } - - const auto & session = it->second; - - if (session->session_is_used) - throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); - session->session_is_used = true; - - session->client_info = client_info; - - return session; + return shared->sessions.acquireSession(getSessionKey(session_id), *this, timeout, session_check); } void Context::releaseSession(const String & session_id, std::chrono::steady_clock::duration timeout) { - auto lock = getLock(); - - session_is_used = false; - scheduleCloseSession(getSessionKey(session_id), timeout); -} - - -std::chrono::steady_clock::duration Context::closeSessions() const -{ - auto lock = getLock(); - - const auto now = std::chrono::steady_clock::now(); - - if (now < shared->close_cycle_time) - return shared->close_cycle_time - now; - - const auto current_cycle = shared->close_cycle; - - ++shared->close_cycle; - shared->close_cycle_time = now + shared->close_interval; - - if (shared->close_times.empty()) - return shared->close_interval; - - auto & sessions_to_close = shared->close_times.front(); - - for (const auto & key : sessions_to_close) - { - const auto session = shared->sessions.find(key); - - if (session != shared->sessions.end() && session->second->session_close_cycle <= current_cycle) - { - if (session->second->session_is_used) - session->second->scheduleCloseSession(key, std::chrono::seconds(0)); - else - shared->sessions.erase(session); - } - } - - shared->close_times.pop_front(); - - return shared->close_interval; + shared->sessions.releaseSession(getSessionKey(session_id), *this, timeout); } @@ -2259,65 +2308,4 @@ void Context::resetInputCallbacks() input_blocks_reader = {}; } - -class SessionCleaner -{ -public: - SessionCleaner(Context & context_) - : context{context_} - { - } - ~SessionCleaner(); - -private: - void run(); - - Context & context; - - std::mutex mutex; - std::condition_variable cond; - std::atomic quit{false}; - ThreadFromGlobalPool thread{&SessionCleaner::run, this}; -}; - -SessionCleaner::~SessionCleaner() -{ - try - { - { - std::lock_guard lock{mutex}; - quit = true; - } - - cond.notify_one(); - - thread.join(); - } - catch (...) - { - DB::tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - -void SessionCleaner::run() -{ - setThreadName("SessionCleaner"); - - std::unique_lock lock{mutex}; - - while (true) - { - auto interval = context.closeSessions(); - - if (cond.wait_for(lock, interval, [this]() -> bool { return quit; })) - break; - } -} - -void Context::createSessionCleaner() -{ - session_cleaner = std::make_unique(*this); -} - - } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index da7cb98310e..4395c1da4f5 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -96,11 +96,8 @@ class DiskSelector; class StoragePolicy; using StoragePolicyPtr = std::shared_ptr; class StoragePolicySelector; -class SessionCleaner; - class IOutputFormat; using OutputFormatPtr = std::shared_ptr; - class Volume; using VolumePtr = std::shared_ptr; @@ -179,7 +176,7 @@ private: Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context. Could be equal to this. - std::shared_ptr session_cleaner; /// It will launch a thread to clean old named HTTP sessions. See 'createSessionCleaner'. + friend class Sessions; UInt64 session_close_cycle = 0; bool session_is_used = false; @@ -423,12 +420,9 @@ public: const Databases getDatabases() const; Databases getDatabases(); - std::shared_ptr acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) const; + std::shared_ptr acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); void releaseSession(const String & session_id, std::chrono::steady_clock::duration timeout); - /// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added. - std::chrono::steady_clock::duration closeSessions() const; - /// For methods below you may need to acquire a lock by yourself. std::unique_lock getLock() const; @@ -637,9 +631,6 @@ private: SessionKey getSessionKey(const String & session_id) const; - /// Session will be closed after specified timeout. - void scheduleCloseSession(const SessionKey & key, std::chrono::steady_clock::duration timeout); - void checkCanBeDropped(const String & database, const String & table, const size_t & size, const size_t & max_size_to_drop) const; }; From aac2f98870e7dee800cd6d84e7a980e57c920f0d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 06:57:31 +0300 Subject: [PATCH 126/215] Better code around sessions, step 2 --- dbms/programs/server/HTTPHandler.cpp | 11 +-- dbms/src/Interpreters/Context.cpp | 103 +++++++++++++++++---------- dbms/src/Interpreters/Context.h | 13 +--- 3 files changed, 70 insertions(+), 57 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index eb83fc6f5d7..69689a55e3d 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -273,7 +273,7 @@ void HTTPHandler::processQuery( /// The user could specify session identifier and session timeout. /// It allows to modify settings, create temporary tables and reuse them in subsequent requests. - std::shared_ptr session; + std::shared_ptr session; String session_id; std::chrono::steady_clock::duration session_timeout; bool session_is_set = params.has("session_id"); @@ -287,15 +287,10 @@ void HTTPHandler::processQuery( session = context.acquireSession(session_id, session_timeout, session_check == "1"); - context = *session; - context.setSessionContext(*session); + context = session->context; + context.setSessionContext(session->context); } - SCOPE_EXIT({ - if (session_is_set) - session->releaseSession(session_id, session_timeout); - }); - /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); CompressionMethod http_response_compression_method = CompressionMethod::None; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 0a0ec748c56..c5e0dd19b8c 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -96,11 +96,34 @@ namespace ErrorCodes } +class Sessions; + +/// User name and session identifier. Named sessions are local to users. +using SessionKey = std::pair; + /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. +struct Session +{ + SessionKey key; + UInt64 close_cycle = 0; + bool is_used = false; + Context context; + std::chrono::steady_clock::duration timeout; + Sessions & parent; + + Session(SessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, Sessions & parent_) + : key(key_), context(context_), timeout(timeout_), parent(parent_) + { + } + + ~Session(); +}; + + class Sessions { public: - using Key = Context::SessionKey; + using Key = SessionKey; ~Sessions() { @@ -121,10 +144,17 @@ public: } /// Find existing session or create a new. - std::shared_ptr acquireSession(const Key & key, Context & context, std::chrono::steady_clock::duration timeout, bool throw_if_not_found) + std::shared_ptr acquireSession(const String & session_id, Context & context, std::chrono::steady_clock::duration timeout, bool throw_if_not_found) { std::unique_lock lock(mutex); + auto & user_name = context.client_info.current_user; + + if (user_name.empty()) + throw Exception("Empty user name.", ErrorCodes::LOGICAL_ERROR); + + Key key(user_name, session_id); + auto it = sessions.find(key); if (it == sessions.end()) { @@ -132,29 +162,31 @@ public: throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); /// Create a new session from current context. - auto new_session = std::make_shared(context); - scheduleCloseSession(key, *new_session, timeout, lock); + auto new_session = std::make_shared(key, context, timeout, *this); + + scheduleCloseSession(*new_session, lock); it = sessions.insert(std::make_pair(key, std::move(new_session))).first; } - else if (it->second->client_info.current_user != context.client_info.current_user) + else if (it->second->key.first != context.client_info.current_user) { throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR); } + /// Use existing session. const auto & session = it->second; - if (session->session_is_used) + if (session->is_used) throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); - session->session_is_used = true; + session->is_used = true; return session; } - void releaseSession(const Key & key, Context & session, std::chrono::steady_clock::duration timeout) + void releaseSession(Session & session) { std::unique_lock lock(mutex); - session.session_is_used = false; - scheduleCloseSession(key, session, timeout, lock); + session.is_used = false; + scheduleCloseSession(session, lock); } private: @@ -170,7 +202,7 @@ private: } }; - using Container = std::unordered_map, SessionKeyHash>; + using Container = std::unordered_map, SessionKeyHash>; using CloseTimes = std::deque>; Container sessions; CloseTimes close_times; @@ -178,17 +210,20 @@ private: std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now(); UInt64 close_cycle = 0; - void scheduleCloseSession(const Key & key, Context & session, std::chrono::steady_clock::duration timeout, std::unique_lock &) + void scheduleCloseSession(Session & session, std::unique_lock &) { - const UInt64 close_index = timeout / close_interval + 1; + /// Push it on a queue of sessions to close, on a position corresponding to the timeout. + /// (timeout is measured from current moment of time) + + const UInt64 close_index = session.timeout / close_interval + 1; const auto new_close_cycle = close_cycle + close_index; - if (session.session_close_cycle != new_close_cycle) + if (session.close_cycle != new_close_cycle) { - session.session_close_cycle = new_close_cycle; + session.close_cycle = new_close_cycle; if (close_times.size() < close_index + 1) close_times.resize(close_index + 1); - close_times[close_index].emplace_back(key); + close_times[close_index].emplace_back(session.key); } } @@ -230,10 +265,13 @@ private: { const auto session = sessions.find(key); - if (session != sessions.end() && session->second->session_close_cycle <= current_cycle) + if (session != sessions.end() && session->second->close_cycle <= current_cycle) { - if (session->second->session_is_used) - scheduleCloseSession(key, *session->second, std::chrono::seconds(0), lock); + if (session->second->is_used) + { + session->second->timeout = std::chrono::steady_clock::duration{0}; + scheduleCloseSession(*session->second, lock); + } else sessions.erase(session); } @@ -250,6 +288,12 @@ private: }; +Session::~Session() +{ + parent.releaseSession(*this); +} + + /** Set of known objects (environment), that could be used in query. * Shared (global) part. Order of members (especially, order of destruction) is very important. */ @@ -494,26 +538,9 @@ Databases Context::getDatabases() } -Context::SessionKey Context::getSessionKey(const String & session_id) const +std::shared_ptr Context::acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) { - auto & user_name = client_info.current_user; - - if (user_name.empty()) - throw Exception("Empty user name.", ErrorCodes::LOGICAL_ERROR); - - return SessionKey(user_name, session_id); -} - - -std::shared_ptr Context::acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) -{ - return shared->sessions.acquireSession(getSessionKey(session_id), *this, timeout, session_check); -} - - -void Context::releaseSession(const String & session_id, std::chrono::steady_clock::duration timeout) -{ - shared->sessions.releaseSession(getSessionKey(session_id), *this, timeout); + return shared->sessions.acquireSession(session_id, *this, timeout, session_check); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 4395c1da4f5..07d2414660b 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -100,11 +100,10 @@ class IOutputFormat; using OutputFormatPtr = std::shared_ptr; class Volume; using VolumePtr = std::shared_ptr; +struct Session; #if USE_EMBEDDED_COMPILER - class CompiledExpressionCache; - #endif /// Table -> set of table-views that make SELECT from it. @@ -177,8 +176,6 @@ private: Context * global_context = nullptr; /// Global context. Could be equal to this. friend class Sessions; - UInt64 session_close_cycle = 0; - bool session_is_used = false; using SampleBlockCache = std::unordered_map; mutable SampleBlockCache sample_block_cache; @@ -420,8 +417,7 @@ public: const Databases getDatabases() const; Databases getDatabases(); - std::shared_ptr acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); - void releaseSession(const String & session_id, std::chrono::steady_clock::duration timeout); + std::shared_ptr acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); /// For methods below you may need to acquire a lock by yourself. std::unique_lock getLock() const; @@ -582,9 +578,6 @@ public: String getFormatSchemaPath() const; void setFormatSchemaPath(const String & path); - /// User name and session identifier. Named sessions are local to users. - using SessionKey = std::pair; - SampleBlockCache & getSampleBlockCache() const; /// Query parameters for prepared statements. @@ -629,8 +622,6 @@ private: StoragePtr getTableImpl(const StorageID & table_id, std::optional * exception) const; - SessionKey getSessionKey(const String & session_id) const; - void checkCanBeDropped(const String & database, const String & table, const size_t & size, const size_t & max_size_to_drop) const; }; From 4e15d744699f70d4ffbc554d78d79718e9dba182 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 07:10:48 +0300 Subject: [PATCH 127/215] Better code around sessions, step 3 --- dbms/programs/server/HTTPHandler.cpp | 5 +++ dbms/src/Interpreters/Context.cpp | 47 +++------------------------- dbms/src/Interpreters/Context.h | 25 +++++++++++++++ 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 69689a55e3d..7eafe00fdd6 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -291,6 +291,11 @@ void HTTPHandler::processQuery( context.setSessionContext(session->context); } + SCOPE_EXIT({ + if (session) + session->release(); + }); + /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); CompressionMethod http_response_compression_method = CompressionMethod::None; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index c5e0dd19b8c..a7011a1d4ae 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -96,30 +96,6 @@ namespace ErrorCodes } -class Sessions; - -/// User name and session identifier. Named sessions are local to users. -using SessionKey = std::pair; - -/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. -struct Session -{ - SessionKey key; - UInt64 close_cycle = 0; - bool is_used = false; - Context context; - std::chrono::steady_clock::duration timeout; - Sessions & parent; - - Session(SessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, Sessions & parent_) - : key(key_), context(context_), timeout(timeout_), parent(parent_) - { - } - - ~Session(); -}; - - class Sessions { public: @@ -162,10 +138,7 @@ public: throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); /// Create a new session from current context. - auto new_session = std::make_shared(key, context, timeout, *this); - - scheduleCloseSession(*new_session, lock); - it = sessions.insert(std::make_pair(key, std::move(new_session))).first; + it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; } else if (it->second->key.first != context.client_info.current_user) { @@ -175,17 +148,15 @@ public: /// Use existing session. const auto & session = it->second; - if (session->is_used) + if (!session.unique()) throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); - session->is_used = true; return session; } void releaseSession(Session & session) { std::unique_lock lock(mutex); - session.is_used = false; scheduleCloseSession(session, lock); } @@ -265,16 +236,8 @@ private: { const auto session = sessions.find(key); - if (session != sessions.end() && session->second->close_cycle <= current_cycle) - { - if (session->second->is_used) - { - session->second->timeout = std::chrono::steady_clock::duration{0}; - scheduleCloseSession(*session->second, lock); - } - else - sessions.erase(session); - } + if (session != sessions.end() && session->second.unique() && session->second->close_cycle <= current_cycle) + sessions.erase(session); } close_times.pop_front(); @@ -288,7 +251,7 @@ private: }; -Session::~Session() +void Session::release() { parent.releaseSession(*this); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 07d2414660b..de1d12e1a47 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -102,6 +102,7 @@ class Volume; using VolumePtr = std::shared_ptr; struct Session; + #if USE_EMBEDDED_COMPILER class CompiledExpressionCache; #endif @@ -133,6 +134,7 @@ struct IHostContext using IHostContextPtr = std::shared_ptr; + /** A set of known objects that can be used in the query. * Consists of a shared part (always common to all sessions and queries) * and copied part (which can be its own for each session or query). @@ -653,4 +655,27 @@ private: std::unique_lock table_lock; }; + +class Sessions; + +/// User name and session identifier. Named sessions are local to users. +using SessionKey = std::pair; + +/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. +struct Session +{ + SessionKey key; + UInt64 close_cycle = 0; + Context context; + std::chrono::steady_clock::duration timeout; + Sessions & parent; + + Session(SessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, Sessions & parent_) + : key(key_), context(context_), timeout(timeout_), parent(parent_) + { + } + + void release(); +}; + } From 202af42d17775e8cc9a1ced749b45cb3cd33436e Mon Sep 17 00:00:00 2001 From: Slach Date: Thu, 5 Mar 2020 12:35:48 +0500 Subject: [PATCH 128/215] add description for /ping http handler Signed-off-by: Slach --- docs/en/interfaces/http.md | 12 +++++++++--- docs/en/operations/monitoring.md | 2 +- docs/en/operations/server_settings/settings.md | 1 + docs/ru/interfaces/http.md | 14 +++++++++----- docs/ru/operations/monitoring.md | 2 +- docs/ru/operations/server_settings/settings.md | 1 + docs/zh/operations/monitoring.md | 2 +- 7 files changed, 23 insertions(+), 11 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 8badede4665..0ce700bdc54 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -3,14 +3,20 @@ The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. By default, clickhouse-server listens for HTTP on port 8123 (this can be changed in the config). -If you make a GET / request without parameters, it returns 200 response code and the string "Ok." (with a line feed at the end). -[http_server_default_response](server_settings/settings.md#server_settings-http_server_default_response) settings can change default HTTP response. -Be careful when use this in health-check scripts. + +If you make a GET / request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server_settings/settings.md#server_settings-http_server_default_response) default value "Ok." (with a line feed at the end) ```bash $ curl 'http://localhost:8123/' Ok. ``` +Use GET /ping request in health-check scripts. This handler always return "Ok." (with a line feed at the end). Available from version 18.12.13. +```bash +$ curl 'http://localhost:8123/ping' +Ok. +``` + + Send the request as a URL 'query' parameter, or as a POST. Or send the beginning of the query in the 'query' parameter, and the rest in the POST (we'll explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries. If successful, you receive the 200 response code and the result in the response body. diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index eef7d8c38bb..b9227cdfa1e 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -32,6 +32,6 @@ You can find metrics in the [system.metrics](system_tables.md#system_tables-metr You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html). -Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If the server is available, it responds with `200 OK`. +Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`. To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 218f29aff8a..1e48b374711 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -218,6 +218,7 @@ If `http_port` is specified, the openSSL configuration is ignored even if it is ## http_server_default_response {#server_settings-http_server_default_response} The page that is shown by default when you access the ClickHouse HTTP(s) server. +Default value is "Ok." (with a line feed at the end) **Example** diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 38f3362cab7..f51689054e3 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -3,16 +3,20 @@ HTTP интерфейс позволяет использовать ClickHouse на любой платформе, из любого языка программирования. У нас он используется для работы из Java и Perl, а также из shell-скриптов. В других отделах, HTTP интерфейс используется из Perl, Python и Go. HTTP интерфейс более ограничен по сравнению с родным интерфейсом, но является более совместимым. По умолчанию, clickhouse-server слушает HTTP на порту 8123 (это можно изменить в конфиге). -Если запросить GET / без параметров, то вернётся строка "Ok." (с переводом строки на конце). -Настройка [http_server_default_response](server_settings/settings.md#server_settings-http_server_default_response) может изменить тело HTTP ответа по умолчанию. -Будьте осторожны в скриптах проверки доступности. - +Если запросить GET / без параметров, то вернётся строка заданная с помощью настройки [http_server_default_response](../operations/server_settings/settings.md#server_settings-http_server_default_response). Значение по умолчанию "Ok." (с переводом строки на конце). ```bash $ curl 'http://localhost:8123/' Ok. ``` -Запрос отправляется в виде параметра URL query. Или POST-ом. Или начало запроса в параметре query, а продолжение POST-ом (зачем это нужно, будет объяснено ниже). Размер URL ограничен 16KB, это следует учитывать при отправке больших запросов. +В скриптах проверки доступности вы можете использовать GET /ping без параметров. Если сервер доступен всегда возвращается "Ok." (с переводом строки на конце). +```bash +$ curl 'http://localhost:8123/ping' +Ok. +``` + +Запрос отправляется в виде URL параметра с именем query. Или как тело запроса при использовании метода POST. +Или начало запроса в URL параметре query, а продолжение POST-ом (зачем это нужно, будет объяснено ниже). Размер URL ограничен 16KB, это следует учитывать при отправке больших запросов. В случае успеха, вам вернётся код ответа 200 и результат обработки запроса в теле ответа. В случае ошибки, вам вернётся код ответа 500 и текст с описанием ошибки в теле ответа. diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index 7e7784b79b6..4467ef134d6 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -32,6 +32,6 @@ ClickHouse собирает: Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_settings/settings.md#server_settings-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). -Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/`. Если сервер доступен, он отвечает `200 OK`. +Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/ping`. Если сервер доступен, он отвечает `200 OK`. Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas_status`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas_status` возвращает `200 OK`. Если реплика отстаёт, то запрос возвращает `503 HTTP_SERVICE_UNAVAILABLE`, включая информацию о размере отставания. diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 0b49146fbdf..47ec5ef7d9b 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -210,6 +210,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## http_server_default_response {#server_settings-http_server_default_response} Страница, показываемая по умолчанию, при обращении к HTTP(s) серверу ClickHouse. +Значение по умолчанию "Ok." (с переводом строки на конце). **Пример** diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index cef3baf169b..bd177fe15b2 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -32,6 +32,6 @@ ClickHouse 收集的指标项: 可以配置ClickHouse 往 [Graphite](https://github.com/graphite-project)导入指标。 参考 [Graphite section](server_settings/settings.md#server_settings-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 -此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/`。 如果服务器可用,它将以 `200 OK` 响应。 +此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。 要监视服务器集群的配置中,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 From db87e6f6685857205aaedb7b9d04dbf52ace9421 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 5 Mar 2020 11:21:53 +0300 Subject: [PATCH 129/215] Fixed style. --- dbms/src/Disks/DiskSpaceMonitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Disks/DiskSpaceMonitor.cpp b/dbms/src/Disks/DiskSpaceMonitor.cpp index 9bbd7907d69..d555e8152b0 100644 --- a/dbms/src/Disks/DiskSpaceMonitor.cpp +++ b/dbms/src/Disks/DiskSpaceMonitor.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int UNKNOWN_DISK; extern const int UNKNOWN_POLICY; From b1d63c51fe65fdd3cf9c88368d3cf7652c6931ed Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Thu, 5 Mar 2020 11:38:54 +0300 Subject: [PATCH 130/215] CLICKHOUSEDOCS-548: More adopter. --- docs/en/introduction/adopters.md | 95 +++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index edfc774f7b6..eafb9d6d0fb 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -10,6 +10,8 @@ | [Appsflyer](https://www.appsflyer.com) | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | | [Badoo](https://badoo.com) | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | | [Bloomberg](https://www.bloomberg.com/) | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| [Bloxy](https://bloxy.info) | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| `Dataliance/UltraPower` | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | | [CARTO](https://carto.com/) | Business Intelligence | Geo analytics | — | — | [Geospatial processing with Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | | [CERN](http://public.web.cern.ch/public/) | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | | [Cisco](http://cisco.com/) | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | @@ -18,20 +20,37 @@ | [ContentSquare](https://contentsquare.com) | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | [Cloudflare](https://cloudflare.com) | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | | [Corunet](https://coru.net/) | Analytics | Main product | — | — | [Slides in English, April 2019 ](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| [Criteo/Storetail] | Retail | Main product | — | — | [Slides in English, October 2018 ](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | | [Deutsche Bank](db.com) | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | | [Exness](https://www.exness.com) | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | [Geniee](https://geniee.co.jp) | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| [HUYA](https://www.huya.com/) | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | | [Idealista](www.idealista.com) | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Kodiak Data](https://www.kodiakdata.com/) | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | | [Kontur](https://kontur.ru) | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| [LifeStreet](https://cloudflare.com) | Ad network | Main product | — | — | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| [LifeStreet](https://cloudflare.com) | Ad network | Main product | 60 servers in 3 replicas | 2-2.5 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | [Mail.ru Cloud Solutions](https://mcs.mail.ru/) | Cloud services | Main product | — | — | [Running ClickHouse Instance, in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | --- | --- | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | — | — | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| [MessageBird](https://www.messagebird.com) | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| [OneAPM](https://www.oneapm.com/) | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| [Pragma Innovation](http://www.pragma-innovation.fr/) | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| [QINGCLOUD](https://www.qingcloud.com/) | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | [Qrator](https://qrator.net) | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| [Rambler](rambler.ru) | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | [Tencent](https://www.tencent.com) | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | +| [Traffic Stars](https://trafficstars.com/) | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | | [S7 Airlines](https://www.s7.ru) | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| [scireum GmbH](https://www.scireum.de/) | e-Commerce | ??? | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| [SEMrush](https://www.semrush.com/) | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| [scireum GmbH](https://www.scireum.de/) | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| [Sentry](https://sentry.io/) | Software developer | Backend for product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| [Sina](http://english.sina.com/index.html) | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| [SMI2](https://smi2.ru/) | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | +| [Splunk](https://www.splunk.com/) | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | | [Spotify](https://www.spotify.com) | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| [Tencent](https://www.tencent.com) | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | + Нераспознанный китайский источник https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf | [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/ml.pdf) | +| [VKontakte](vk.com) | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | | [Yandex Cloud](https://cloud.yandex.ru/services/managed-clickhouse) | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | | [Yandex DataLens](https://cloud.yandex.ru/services/datalens) | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | | [Yandex Market](https://market.yandex.ru/) | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | @@ -57,6 +76,19 @@ https://blockchair.com/ - https://www.octonica.ru/ презентация https://github.com/ClickHouse/clickhouse-presentations/blob/master/database_saturday_2018_2/octonica/meetup.pptx +Yandex.Mail is using ClickHouse to record user activity logs in a web interface for investigations and analytics. + +Yandex.Browser is using ClickHouse for performance histograms. Browsers send many mini-histograms from clients. +They are all stored into ClickHouse for the purpose of Browser version comparisons, analytics and investigations. + +- Infinidat https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/clickhouse_in_the_world.pptx +- Vertamedia +- NVidia +- Percona https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/clickhouse_in_the_world.pptx Но это не компания, это разработчик тулзов для OpenSource. https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup14/analyzing_mysql_logs.pptx +- BI Tableau https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/tableau.pptx December 2017? это дата загрузки презентации. +- https://my.com/ @mail.ru https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/target.pdf +- [Эрливидео](https://flussonic.ru/) https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/erlyvideo.key +- mipt https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/mipt.pdf ### Stash @@ -81,6 +113,15 @@ https://blockchair.com/ - ByteDance, единственное прямое доказательство было ссылкой на уже не существующую вакансию. +- Booking.com. Экспериментальная поддержка ClickHouse в https://github.com/bookingcom/carbonapi + +- [Roistat](https://roistat.com) разработала [Go ClickHouse Connector](https://github.com/roistat/go-clickhouse) и поддерживает свой [Docker](https://github.com/roistat/docker-clickhouse). + +- JD.com ClickHouse Meetup in Beijing 2019 +- KuaiShou ClickHouse Meetup in Beijing 2018 + +- JetBrains DataGrip ? https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/6_datagrip.pdf + [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) @@ -96,30 +137,30 @@ https://blockchair.com/ + database_saturday_2018_2/octonica - database_saturday_2019 + dataops_2019 (CARTO, Mercadona, Zara, Idealista, Corunet, ... Cloudflare, Spotify, Amadeus, Bloomberg, Cisco, Deutsche Bank, Tencent, ByteDance) -drafts yandex/ClickHouse -> ClickHouse/ClickHouse, part 2 5 months ago -evolution Correction on KuaiShou company name 3 months ago -group_by Changed tabs to spaces in code [#CLICKHOUSE-3]. 3 years ago -hash_tables yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -highload2016 Added historical presentation from HighLoad 2016 11 days ago -highload2017 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -highload2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -highload2019 Added presentation from HighLoad++ 2019 4 months ago -highload_siberia_2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -highload_siberia_2019 Added another presentation from HighLoad Siberia 2019 8 months ago -highload_spb_2019 Added presentation from Saint Highload 2019 11 months ago -hse_2019 Added one more presentation from HSE 8 months ago -internals add sources for the internals presentation 2 years ago -it_rzn_2019 Updated presentation from IT Rzn 3 months ago -meetup10 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup11 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup12 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup13 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup14 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup15 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup16 Added all presentations from Summer Berlin Meetup 2 years ago -meetup17 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup18 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup19 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago ++ drafts (CloudFlare, Booking.com, Crobox, Rambler, QRator, СКБ Контур, Roistat, SMI2) ++ evolution (ByteDance, Sina, Tencent, JD.com, KuaiShou) +- group_by +- hash_tables +- highload2016 +- highload2017 +- highload2018 +- highload2019 +- highload_siberia_2018 +- highload_siberia_2019 +- highload_spb_2019 +- hse_2019 +- internals +- it_rzn_2019 ++ meetup10 ++ meetup11 ++ meetup12 ++ meetup13 +- meetup14 ++ meetup15 +- meetup16 ++ meetup17 ++ meetup18 ++ meetup19 meetup20 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago meetup21 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago meetup22 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago From 63423a5162d4146696a3c8f973428a4c804877d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 15:27:32 +0300 Subject: [PATCH 131/215] Fixed absolutely wrong documentation for parallel INSERT SELECT --- dbms/src/Core/Settings.h | 2 +- dbms/src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 30506359f3b..ffc575e1eb6 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -53,7 +53,7 @@ struct Settings : public SettingsCollection M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \ - M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. By default, it is determined automatically.", 0) \ + M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \ M(SettingMaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ M(SettingMaxThreads, max_alter_threads, 0, "The maximum number of threads to execute the ALTER requests. By default, it is determined automatically.", 0) \ M(SettingUInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index bab361ab52d..710b0c6b28f 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -117,7 +117,7 @@ BlockIO InterpreterInsertQuery::execute() /// Passing 1 as subquery_depth will disable limiting size of intermediate result. InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)}; - if (table->supportsParallelInsert() && settings.max_insert_threads > 0) + if (table->supportsParallelInsert() && settings.max_insert_threads > 1) { in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline); out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size()); From d6b7f81866a38e69b22ecb0afdc771df459a0a08 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 15:46:05 +0300 Subject: [PATCH 132/215] Attempt to fix flacky test --- ...ookeeper_test_alter_compression_codecs.sql | 88 ++++++++++--------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql b/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql index 9a27429d180..c0cb61421e5 100644 --- a/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql +++ b/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql @@ -1,65 +1,67 @@ SET send_logs_level = 'none'; -DROP TABLE IF EXISTS test.alter_compression_codec1; -DROP TABLE IF EXISTS test.alter_compression_codec2; +DROP TABLE IF EXISTS alter_compression_codec1; +DROP TABLE IF EXISTS alter_compression_codec2; -CREATE TABLE test.alter_compression_codec1 ( +CREATE TABLE alter_compression_codec1 ( somedate Date CODEC(LZ4), id UInt64 CODEC(NONE) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter_compression_codecs', '1') PARTITION BY somedate ORDER BY id; -CREATE TABLE test.alter_compression_codec2 ( +CREATE TABLE alter_compression_codec2 ( somedate Date CODEC(LZ4), id UInt64 CODEC(NONE) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter_compression_codecs', '2') PARTITION BY somedate ORDER BY id; -INSERT INTO test.alter_compression_codec1 VALUES('2018-01-01', 1); -INSERT INTO test.alter_compression_codec1 VALUES('2018-01-01', 2); -SYSTEM SYNC REPLICA test.alter_compression_codec2; +INSERT INTO alter_compression_codec1 VALUES('2018-01-01', 1); +INSERT INTO alter_compression_codec1 VALUES('2018-01-01', 2); +SYSTEM SYNC REPLICA alter_compression_codec2; -SELECT * FROM test.alter_compression_codec1 ORDER BY id; -SELECT * FROM test.alter_compression_codec2 ORDER BY id; +SELECT * FROM alter_compression_codec1 ORDER BY id; +SELECT * FROM alter_compression_codec2 ORDER BY id; -ALTER TABLE test.alter_compression_codec1 ADD COLUMN alter_column String DEFAULT 'default_value' CODEC(ZSTD); -SYSTEM SYNC REPLICA test.alter_compression_codec2; +ALTER TABLE alter_compression_codec1 ADD COLUMN alter_column String DEFAULT 'default_value' CODEC(ZSTD); +SYSTEM SYNC REPLICA alter_compression_codec1; +SYSTEM SYNC REPLICA alter_compression_codec2; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec1' AND name = 'alter_column'; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec2' AND name = 'alter_column'; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec1' AND name = 'alter_column'; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec2' AND name = 'alter_column'; -INSERT INTO test.alter_compression_codec1 VALUES('2018-01-01', 3, '3'); -INSERT INTO test.alter_compression_codec1 VALUES('2018-01-01', 4, '4'); -SYSTEM SYNC REPLICA test.alter_compression_codec2; +INSERT INTO alter_compression_codec1 VALUES('2018-01-01', 3, '3'); +INSERT INTO alter_compression_codec1 VALUES('2018-01-01', 4, '4'); +SYSTEM SYNC REPLICA alter_compression_codec1; +SYSTEM SYNC REPLICA alter_compression_codec2; -SELECT * FROM test.alter_compression_codec1 ORDER BY id; -SELECT * FROM test.alter_compression_codec2 ORDER BY id; +SELECT * FROM alter_compression_codec1 ORDER BY id; +SELECT * FROM alter_compression_codec2 ORDER BY id; -ALTER TABLE test.alter_compression_codec1 MODIFY COLUMN alter_column CODEC(NONE); -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec1' AND name = 'alter_column'; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec2' AND name = 'alter_column'; +ALTER TABLE alter_compression_codec1 MODIFY COLUMN alter_column CODEC(NONE); +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec1' AND name = 'alter_column'; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec2' AND name = 'alter_column'; -INSERT INTO test.alter_compression_codec2 VALUES('2018-01-01', 5, '5'); -INSERT INTO test.alter_compression_codec2 VALUES('2018-01-01', 6, '6'); -SYSTEM SYNC REPLICA test.alter_compression_codec1; -SELECT * FROM test.alter_compression_codec1 ORDER BY id; -SELECT * FROM test.alter_compression_codec2 ORDER BY id; +INSERT INTO alter_compression_codec2 VALUES('2018-01-01', 5, '5'); +INSERT INTO alter_compression_codec2 VALUES('2018-01-01', 6, '6'); +SYSTEM SYNC REPLICA alter_compression_codec1; +SELECT * FROM alter_compression_codec1 ORDER BY id; +SELECT * FROM alter_compression_codec2 ORDER BY id; -ALTER TABLE test.alter_compression_codec1 MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, NONE); -SYSTEM SYNC REPLICA test.alter_compression_codec1; -SYSTEM SYNC REPLICA test.alter_compression_codec2; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec1' AND name = 'alter_column'; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec2' AND name = 'alter_column'; +ALTER TABLE alter_compression_codec1 MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, NONE); +SYSTEM SYNC REPLICA alter_compression_codec1; +SYSTEM SYNC REPLICA alter_compression_codec2; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec1' AND name = 'alter_column'; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec2' AND name = 'alter_column'; -INSERT INTO test.alter_compression_codec1 VALUES('2018-01-01', 7, '7'); -INSERT INTO test.alter_compression_codec2 VALUES('2018-01-01', 8, '8'); -SYSTEM SYNC REPLICA test.alter_compression_codec2; -SYSTEM SYNC REPLICA test.alter_compression_codec1; -SELECT * FROM test.alter_compression_codec1 ORDER BY id; -SELECT * FROM test.alter_compression_codec2 ORDER BY id; +INSERT INTO alter_compression_codec1 VALUES('2018-01-01', 7, '7'); +INSERT INTO alter_compression_codec2 VALUES('2018-01-01', 8, '8'); +SYSTEM SYNC REPLICA alter_compression_codec2; +SYSTEM SYNC REPLICA alter_compression_codec1; +SELECT * FROM alter_compression_codec1 ORDER BY id; +SELECT * FROM alter_compression_codec2 ORDER BY id; -ALTER TABLE test.alter_compression_codec1 MODIFY COLUMN alter_column FixedString(100); -SYSTEM SYNC REPLICA test.alter_compression_codec2; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec1' AND name = 'alter_column'; -SELECT compression_codec FROM system.columns WHERE database = 'test' AND table = 'alter_compression_codec2' AND name = 'alter_column'; +ALTER TABLE alter_compression_codec1 MODIFY COLUMN alter_column FixedString(100); +SYSTEM SYNC REPLICA alter_compression_codec2; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec1' AND name = 'alter_column'; +SELECT compression_codec FROM system.columns WHERE table = 'alter_compression_codec2' AND name = 'alter_column'; -DROP TABLE IF EXISTS test.alter_compression_codec1; -DROP TABLE IF EXISTS test.alter_compression_codec2; +DROP TABLE IF EXISTS alter_compression_codec1; +DROP TABLE IF EXISTS alter_compression_codec2; From bdabcccc7b673f5317da20a7a3b4fbe7132ffb79 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 16:12:00 +0300 Subject: [PATCH 133/215] Retry when we should --- dbms/programs/client/Client.cpp | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index be8ee14b78d..963b3251084 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -105,6 +105,7 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED; extern const int INVALID_USAGE_OF_INPUT; + extern const int DEADLOCK_AVOIDED; } @@ -906,9 +907,34 @@ private: query = serializeAST(*parsed_query); } - connection->sendQuery(connection_parameters.timeouts, query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); - sendExternalTables(); - receiveResult(); + static constexpr size_t max_retries = 10; + for (size_t retry = 0; retry < max_retries; ++retry) + { + try + { + connection->sendQuery( + connection_parameters.timeouts, + query, + query_id, + QueryProcessingStage::Complete, + &context.getSettingsRef(), + nullptr, + true); + + sendExternalTables(); + receiveResult(); + + break; + } + catch (const Exception & e) + { + /// Retry when the server said "Client should retry" and no rows has been received yet. + if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && retry + 1 < max_retries) + continue; + + throw; + } + } } From a4573ab7df490a88e7f2cf38721dc95d4bf8f492 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Mar 2020 17:55:53 +0300 Subject: [PATCH 134/215] Quote some identifiers when formatting SQL queries. (#9142) * Added test * Different way to fix the issue * Different way to fix the issue * Descend instead of syntax error while parsing INTERVAL operators * Remove old comment --- dbms/src/IO/WriteHelpers.cpp | 39 +++++++++++++++++++ dbms/src/IO/WriteHelpers.h | 38 ++---------------- dbms/src/Parsers/ExpressionListParsers.cpp | 14 +++++-- .../01081_keywords_formatting.reference | 1 + .../0_stateless/01081_keywords_formatting.sql | 1 + 5 files changed, 56 insertions(+), 37 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01081_keywords_formatting.reference create mode 100644 dbms/tests/queries/0_stateless/01081_keywords_formatting.sql diff --git a/dbms/src/IO/WriteHelpers.cpp b/dbms/src/IO/WriteHelpers.cpp index d2605dce9fe..9fe194a70c8 100644 --- a/dbms/src/IO/WriteHelpers.cpp +++ b/dbms/src/IO/WriteHelpers.cpp @@ -62,4 +62,43 @@ void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trac bool has_nested = false; writeBinary(has_nested, buf); } + + +/// The same, but quotes apply only if there are characters that do not match the identifier without quotes +template +static inline void writeProbablyQuotedStringImpl(const StringRef & s, WriteBuffer & buf, F && write_quoted_string) +{ + if (!s.size || !isValidIdentifierBegin(s.data[0])) + { + write_quoted_string(s, buf); + } + else + { + const char * pos = s.data + 1; + const char * end = s.data + s.size; + for (; pos < end; ++pos) + if (!isWordCharASCII(*pos)) + break; + if (pos != end) + write_quoted_string(s, buf); + else + writeString(s, buf); + } +} + +void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf) +{ + writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); }); +} + +void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf) +{ + writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); }); +} + +void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf) +{ + writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); }); +} + } diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index 0c7478c9177..aaba21a008a 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -509,40 +509,10 @@ inline void writeBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf) } -/// The same, but quotes apply only if there are characters that do not match the identifier without quotes. -template -inline void writeProbablyQuotedStringImpl(const StringRef & s, WriteBuffer & buf, F && write_quoted_string) -{ - if (!s.size || !isValidIdentifierBegin(s.data[0])) - write_quoted_string(s, buf); - else - { - const char * pos = s.data + 1; - const char * end = s.data + s.size; - for (; pos < end; ++pos) - if (!isWordCharASCII(*pos)) - break; - if (pos != end) - write_quoted_string(s, buf); - else - writeString(s, buf); - } -} - -inline void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf) -{ - writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); }); -} - -inline void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf) -{ - writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); }); -} - -inline void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf) -{ - writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); }); -} +/// Write quoted if the string doesn't look like and identifier. +void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf); +void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf); +void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf); /** Outputs the string in for the CSV format. diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index cf755994cd6..f8947262a26 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -601,18 +601,26 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - /// If no INTERVAL keyword, go to nested parser. + auto begin = pos; + + /// If no INTERVAL keyword, go to the nested parser. if (!ParserKeyword("INTERVAL").ignore(pos, expected)) return next_parser.parse(pos, node, expected); ASTPtr expr; /// Any expression can be inside, because operator surrounds it. if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) - return false; + { + pos = begin; + return next_parser.parse(pos, node, expected); + } IntervalKind interval_kind; if (!parseIntervalKind(pos, expected, interval_kind)) - return false; + { + pos = begin; + return next_parser.parse(pos, node, expected); + } /// the function corresponding to the operator auto function = std::make_shared(); diff --git a/dbms/tests/queries/0_stateless/01081_keywords_formatting.reference b/dbms/tests/queries/0_stateless/01081_keywords_formatting.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01081_keywords_formatting.reference @@ -0,0 +1 @@ +2 diff --git a/dbms/tests/queries/0_stateless/01081_keywords_formatting.sql b/dbms/tests/queries/0_stateless/01081_keywords_formatting.sql new file mode 100644 index 00000000000..6044f383c10 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01081_keywords_formatting.sql @@ -0,0 +1 @@ +SELECT (1 AS `interval`) + `interval`; From 2acd6afe0b51319d98779bdab3ba03118b368b8a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 18:08:25 +0300 Subject: [PATCH 135/215] Fixed build --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index af7cd742299..a7ec11ee2cb 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -214,7 +214,7 @@ private: } /// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added. - std::chrono::steady_clock::duration closeSessions(std::unique_lock & lock) + std::chrono::steady_clock::duration closeSessions(std::unique_lock &) { const auto now = std::chrono::steady_clock::now(); From 41ce2ad5b8e8490a18bc9e4f84b6313de6986e05 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Mar 2020 18:22:47 +0300 Subject: [PATCH 136/215] Update 00910_zookeeper_test_alter_compression_codecs.sql --- .../00910_zookeeper_test_alter_compression_codecs.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql b/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql index c0cb61421e5..d9ca0595fa5 100644 --- a/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql +++ b/dbms/tests/queries/0_stateless/00910_zookeeper_test_alter_compression_codecs.sql @@ -1,4 +1,5 @@ SET send_logs_level = 'none'; +SET replication_alter_partitions_sync = 2; DROP TABLE IF EXISTS alter_compression_codec1; DROP TABLE IF EXISTS alter_compression_codec2; From 8587d47c5cc4785fdaf24ed64a803cd897a1dd7c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 18:33:34 +0300 Subject: [PATCH 137/215] Fixed build --- dbms/src/Interpreters/Context.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 94b8933e3b9..d67fa3fb9c0 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -599,8 +599,6 @@ public: void dropCompiledExpressionCache() const; #endif - void createSessionCleaner(); - /// Add started bridge command. It will be killed after context destruction void addXDBCBridgeCommand(std::unique_ptr cmd) const; From 92f84c88393d86de8eb7532b1f9b828dad7f6b34 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Mar 2020 18:37:47 +0300 Subject: [PATCH 138/215] Update TableFunctionGenerate.h --- dbms/src/TableFunctions/TableFunctionGenerate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.h b/dbms/src/TableFunctions/TableFunctionGenerate.h index 0002acd54b1..e4c7f9beeba 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.h +++ b/dbms/src/TableFunctions/TableFunctionGenerate.h @@ -4,7 +4,7 @@ namespace DB { -/* random(structure(, max_array_length, max_string_length, random_seed)) - creates a temporary storage that generates columns with random data +/* generate(structure, [max_array_length, max_string_length, random_seed]) - creates a temporary storage that generates columns with random data */ class TableFunctionGenerate : public ITableFunction { From adcf1735d6c92b84de02e51d63bf66d8c359d7ba Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Mar 2020 18:39:03 +0300 Subject: [PATCH 139/215] Update TableFunctionGenerate.cpp --- dbms/src/TableFunctions/TableFunctionGenerate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.cpp b/dbms/src/TableFunctions/TableFunctionGenerate.cpp index fe3ae5f33df..c68206c3d51 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerate.cpp @@ -35,12 +35,12 @@ StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const ASTs & args = args_func.at(0)->children; if (args.size() < 1) - throw Exception("Table function '" + getName() + "' requires at least one argument: "\ + throw Exception("Table function '" + getName() + "' requires at least one argument: " " structure(, max_array_length, max_string_length, random_seed).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (args.size() > 4) - throw Exception("Table function '" + getName() + "' requires at most four arguments: "\ + throw Exception("Table function '" + getName() + "' requires at most four arguments: " " structure, max_array_length, max_string_length, random_seed.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); From dd13e0353a615e5ccb6325d1b741c105fb3f1518 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 18:40:59 +0300 Subject: [PATCH 140/215] Fixed build --- dbms/programs/server/Server.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 5ab73a93e7a..4410d6ea696 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -1020,8 +1020,6 @@ int Server::main(const std::vector & /*args*/) global_context->getConfigRef(), graphite_key, async_metrics)); } - global_context->createSessionCleaner(); - waitForTerminationRequest(); } From 6b88c87cdc026f94ba122bc4a36d0739bd4dfad4 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 5 Mar 2020 19:55:42 +0200 Subject: [PATCH 141/215] Update README.md --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 1014e3f059f..784ca00f5cc 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,3 @@ ClickHouse is an open-source column-oriented database management system that all * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person. -## Upcoming Events - -* [ClickHouse Meetup in Athens](https://www.meetup.com/Athens-Big-Data/events/268379195/) on March 5. From 678cc00d30985271c8d685ec85410847025ea971 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Mar 2020 21:01:36 +0300 Subject: [PATCH 142/215] Update StorageGenerate.cpp --- dbms/src/Storages/StorageGenerate.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/dbms/src/Storages/StorageGenerate.cpp b/dbms/src/Storages/StorageGenerate.cpp index ed9a31a5e03..bb306d2542f 100644 --- a/dbms/src/Storages/StorageGenerate.cpp +++ b/dbms/src/Storages/StorageGenerate.cpp @@ -37,10 +37,9 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 limit, - UInt64 max_array_length, UInt64 max_string_length, pcg32& generator, pcg64_fast& generator64) + UInt64 max_array_length, UInt64 max_string_length, pcg32 & generator, pcg64_fast & generator64) { TypeIndex idx = type->getTypeId(); - (void) max_string_length; switch (idx) { @@ -214,11 +213,11 @@ void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 l { UInt32 r = generator(); chars[i] = 32 + (r & 0x7F) % 95; - chars[i+1] = 32 + ((r >> 7) & 0x7F) % 95; - chars[i+2] = 32 + ((r >> 14) & 0x7F) % 95; - chars[i+3] = 32 + ((r >> 21) & 0x7F) % 95; - chars[i+4] = 32 + (r >> 28); - i+=4; + chars[i + 1] = 32 + ((r >> 7) & 0x7F) % 95; + chars[i + 2] = 32 + ((r >> 14) & 0x7F) % 95; + chars[i + 3] = 32 + ((r >> 21) & 0x7F) % 95; + chars[i + 4] = 32 + (r >> 28); + i += 4; } else { @@ -229,7 +228,7 @@ void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 l // add terminating zero char for (auto & i : offsets) { - chars[i-1] = 0; + chars[i - 1] = 0; } } break; @@ -237,7 +236,7 @@ void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 l case TypeIndex::FixedString: { auto & column_string = typeid_cast(column); - size_t len = column_string.sizeOfValueIfFixed(); + const size_t len = column_string.sizeOfValueIfFixed(); auto & chars = column_string.getChars(); UInt64 num_chars = static_cast(len) * limit; @@ -310,8 +309,8 @@ void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 l Int128 x = static_cast(generator64()) << 64 | static_cast(generator64()); data[i] = x; } - } break; + } case TypeIndex::UUID: { auto & data = typeid_cast &>(column).getData(); @@ -323,8 +322,8 @@ void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 l auto x = UInt128(a, b); data[i] = x; } - } break; + } case TypeIndex::Array: { auto & column_array = typeid_cast(column); @@ -373,7 +372,7 @@ void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 l null_map.resize(limit); for (UInt64 i = 0; i < limit; ++i) { - null_map[i] = generator() < 1024; + null_map[i] = generator() < 1024; /// No real motivation for this. } break; } From c4f4e6b97e0cf07ce7b2a01d637183bc189489b0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 21:24:31 +0300 Subject: [PATCH 143/215] Added a test from Michael Filimonov --- .../01093_cyclic_defaults_filimonov.reference | 0 .../01093_cyclic_defaults_filimonov.sql | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.reference create mode 100644 dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql diff --git a/dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.reference b/dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql b/dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql new file mode 100644 index 00000000000..548cd794ba3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql @@ -0,0 +1,19 @@ +CREATE TABLE test +( + `a1` UInt64 DEFAULT a + 1, + `a1` UInt64 DEFAULT a + 1, + `a2` UInt64 DEFAULT a3 + a4, + `a3` UInt64 DEFAULT a2 + 1, + `a4` UInt64 ALIAS a3 + 1 +) +ENGINE = Log; -- { serverError 174 } + +CREATE TABLE pythagoras +( + `a` Float64 DEFAULT sqrt((c * c) - (b * b)), + `b` Float64 DEFAULT sqrt((c * c) - (a * a)), + `c` Float64 DEFAULT sqrt((a * a) + (b * b)) +) +ENGINE = Log; -- { serverError 174 } + +-- TODO: It works but should not: CREATE TABLE test (a DEFAULT b, b DEFAULT a) ENGINE = Memory From 0b063b8db2944ad0ad33e5dbf9a3c60b41c637a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 21:57:10 +0300 Subject: [PATCH 144/215] Added results for AMD EPYC --- website/benchmark_hardware.html | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index b3b3a3e6d57..c5801e289f2 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -2172,6 +2172,57 @@ var results = [0.033, 0.025, 0.011] ] }, + + { + "system": "AMD EPYC 7502P / 128G DDR4 / 2NVME SAMSUNG MZQLB960HAJR", + "time": "2020-03-05 00:00:00", + "result": + [ +[0.012, 0.019, 0.009], +[0.042, 0.026, 0.038], +[0.026, 0.032, 0.017], +[0.058, 0.025, 0.027], +[0.095, 0.080, 0.087], +[0.143, 0.125, 0.124], +[0.018, 0.010, 0.016], +[0.013, 0.012, 0.013], +[0.201, 0.182, 0.182], +[0.228, 0.204, 0.204], +[0.093, 0.078, 0.077], +[0.100, 0.080, 0.081], +[0.241, 0.222, 0.218], +[0.291, 0.265, 0.270], +[0.268, 0.254, 0.256], +[0.255, 0.241, 0.242], +[0.623, 0.593, 0.599], +[0.373, 0.343, 0.339], +[1.354, 1.318, 1.311], +[0.054, 0.020, 0.022], +[0.495, 0.247, 0.242], +[0.520, 0.258, 0.248], +[0.957, 0.646, 0.652], +[null, null, null], +[0.149, 0.105, 0.099], +[0.091, 0.070, 0.069], +[0.150, 0.096, 0.094], +[0.499, 0.315, 0.309], +[0.437, 0.354, 0.357], +[1.002, 0.996, 0.991], +[0.234, 0.205, 0.207], +[0.380, 0.305, 0.305], +[1.733, 1.651, 1.655], +[1.230, 1.134, 1.132], +[1.217, 1.130, 1.114], +[0.396, 0.385, 0.383], +[0.156, 0.148, 0.160], +[0.065, 0.062, 0.063], +[0.057, 0.052, 0.052], +[0.368, 0.342, 0.336], +[0.030, 0.025, 0.027], +[0.022, 0.017, 0.019], +[0.005, 0.004, 0.004] + ] + }, ]; @@ -2602,6 +2653,7 @@ Results for AWS are from Wolf Kreuzerkrieg.
Results for Huawei Taishan are from Peng Gao in sina.com.
Results for Selectel and AMD EPYC 7402P are from Andrey Dudin.
Results for ProLiant are from Denis Ustinov.
+Results for AMD EPYC 7502P are from Kostiantyn Velychkovskyi.
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
From 3b85f2ffef0f6915da7e3fdc532a6ffc339e57c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 22:23:39 +0300 Subject: [PATCH 145/215] Do not run session cleaner for client, local --- dbms/programs/server/HTTPHandler.cpp | 4 +-- dbms/programs/server/Server.cpp | 2 ++ dbms/src/Interpreters/Context.cpp | 39 ++++++++++++++++++---------- dbms/src/Interpreters/Context.h | 22 +++++++++------- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 7eafe00fdd6..a4c59ff9e25 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -273,7 +273,7 @@ void HTTPHandler::processQuery( /// The user could specify session identifier and session timeout. /// It allows to modify settings, create temporary tables and reuse them in subsequent requests. - std::shared_ptr session; + std::shared_ptr session; String session_id; std::chrono::steady_clock::duration session_timeout; bool session_is_set = params.has("session_id"); @@ -285,7 +285,7 @@ void HTTPHandler::processQuery( session_timeout = parseSessionTimeout(config, params); std::string session_check = params.get("session_check", ""); - session = context.acquireSession(session_id, session_timeout, session_check == "1"); + session = context.acquireNamedSession(session_id, session_timeout, session_check == "1"); context = session->context; context.setSessionContext(session->context); diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 4410d6ea696..636911c71ca 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -908,6 +908,8 @@ int Server::main(const std::vector & /*args*/) if (servers.empty()) throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + global_context->enableNamedSessions(); + for (auto & server : servers) server->start(); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index a7ec11ee2cb..62f2ceb32cb 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -96,12 +96,12 @@ namespace ErrorCodes } -class Sessions +class NamedSessions { public: - using Key = SessionKey; + using Key = NamedSessionKey; - ~Sessions() + ~NamedSessions() { try { @@ -120,7 +120,11 @@ public: } /// Find existing session or create a new. - std::shared_ptr acquireSession(const String & session_id, Context & context, std::chrono::steady_clock::duration timeout, bool throw_if_not_found) + std::shared_ptr acquireSession( + const String & session_id, + Context & context, + std::chrono::steady_clock::duration timeout, + bool throw_if_not_found) { std::unique_lock lock(mutex); @@ -138,7 +142,7 @@ public: throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); /// Create a new session from current context. - it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; + it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; } else if (it->second->key.first != context.client_info.current_user) { @@ -154,7 +158,7 @@ public: return session; } - void releaseSession(Session & session) + void releaseSession(NamedSession & session) { std::unique_lock lock(mutex); scheduleCloseSession(session, lock); @@ -173,7 +177,7 @@ private: } }; - using Container = std::unordered_map, SessionKeyHash>; + using Container = std::unordered_map, SessionKeyHash>; using CloseTimes = std::deque>; Container sessions; CloseTimes close_times; @@ -181,7 +185,7 @@ private: std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now(); UInt64 close_cycle = 0; - void scheduleCloseSession(Session & session, std::unique_lock &) + void scheduleCloseSession(NamedSession & session, std::unique_lock &) { /// Push it on a queue of sessions to close, on a position corresponding to the timeout. /// (timeout is measured from current moment of time) @@ -247,11 +251,11 @@ private: std::mutex mutex; std::condition_variable cond; std::atomic quit{false}; - ThreadFromGlobalPool thread{&Sessions::cleanThread, this}; + ThreadFromGlobalPool thread{&NamedSessions::cleanThread, this}; }; -void Session::release() +void NamedSession::release() { parent.releaseSession(*this); } @@ -326,8 +330,7 @@ struct ContextShared RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml std::optional trace_collector; /// Thread collecting traces from threads executing queries - - Sessions sessions; /// Controls named HTTP sessions. + std::optional named_sessions; /// Controls named HTTP sessions. /// Clusters for distributed tables /// Initialized on demand (on distributed storages initialization) since Settings should be initialized @@ -501,9 +504,17 @@ Databases Context::getDatabases() } -std::shared_ptr Context::acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) +void Context::enableNamedSessions() { - return shared->sessions.acquireSession(session_id, *this, timeout, session_check); + shared->named_sessions.emplace(); +} + +std::shared_ptr Context::acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) +{ + if (!shared->named_sessions) + throw Exception("Support for named sessions is not enabled", ErrorCodes::NOT_IMPLEMENTED); + + return shared->named_sessions->acquireSession(session_id, *this, timeout, session_check); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index d67fa3fb9c0..5b5b8bdabd5 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -103,7 +103,7 @@ class IOutputFormat; using OutputFormatPtr = std::shared_ptr; class Volume; using VolumePtr = std::shared_ptr; -struct Session; +struct NamedSession; #if USE_EMBEDDED_COMPILER @@ -180,7 +180,7 @@ private: Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context. Could be equal to this. - friend class Sessions; + friend class NamedSessions; using SampleBlockCache = std::unordered_map; mutable SampleBlockCache sample_block_cache; @@ -422,7 +422,11 @@ public: const Databases getDatabases() const; Databases getDatabases(); - std::shared_ptr acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); + /// Allow to use named sessions. The thread will be run to cleanup sessions after timeout has expired. + /// The method must be called at the server startup. + void enableNamedSessions(); + + std::shared_ptr acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); /// For methods below you may need to acquire a lock by yourself. std::unique_lock getLock() const; @@ -659,21 +663,21 @@ private: }; -class Sessions; +class NamedSessions; /// User name and session identifier. Named sessions are local to users. -using SessionKey = std::pair; +using NamedSessionKey = std::pair; /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. -struct Session +struct NamedSession { - SessionKey key; + NamedSessionKey key; UInt64 close_cycle = 0; Context context; std::chrono::steady_clock::duration timeout; - Sessions & parent; + NamedSessions & parent; - Session(SessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, Sessions & parent_) + NamedSession(NamedSessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, NamedSessions & parent_) : key(key_), context(context_), timeout(timeout_), parent(parent_) { } From 3cad6825835afa5e4a82e2e2c7d96cd3f38364dc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 22:25:54 +0300 Subject: [PATCH 146/215] Better exception message while loading tables for database ordinary --- dbms/src/Databases/DatabaseOrdinary.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 9256466de75..ce09c739e22 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -51,6 +51,7 @@ namespace const ASTCreateQuery & query, DatabaseOrdinary & database, const String & database_name, + const String & metadata_path, bool has_force_restore_data_flag) { assert(!query.is_dictionary); @@ -64,7 +65,9 @@ namespace } catch (Exception & e) { - e.addMessage("Cannot attach table '" + backQuote(query.table) + "' from query " + serializeAST(query)); + e.addMessage("Cannot attach table " + backQuote(database_name) + "." + backQuote(query.table) + + " from metadata file " + metadata_path) + + " from query " + serializeAST(query); throw; } } @@ -110,7 +113,6 @@ void DatabaseOrdinary::loadStoredObjects( Context & context, bool has_force_restore_data_flag) { - /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, * which does not correspond to order tables creation and does not correspond to order of their location on disk. @@ -124,7 +126,7 @@ void DatabaseOrdinary::loadStoredObjects( String full_path = getMetadataPath() + file_name; try { - auto ast = parseQueryFromMetadata(context, full_path, /*throw_on_error*/ true, /*remove_empty*/false); + auto ast = parseQueryFromMetadata(context, full_path, /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { auto * create_query = ast->as(); @@ -157,7 +159,7 @@ void DatabaseOrdinary::loadStoredObjects( if (!create_query.is_dictionary) pool.scheduleOrThrowOnError([&]() { - tryAttachTable(context, create_query, *this, getDatabaseName(), has_force_restore_data_flag); + tryAttachTable(context, create_query, *this, getDatabaseName(), name_with_query.first, has_force_restore_data_flag); /// Messages, so that it's not boring to wait for the server to load for a long time. logAboutProgress(log, ++tables_processed, total_tables, watch); From 66f8e9901db556d2dae4569140275c270ff24466 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Mar 2020 22:27:36 +0300 Subject: [PATCH 147/215] Update DatabaseOrdinary.cpp --- dbms/src/Databases/DatabaseOrdinary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index ce09c739e22..33b39ed4853 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -66,8 +66,8 @@ namespace catch (Exception & e) { e.addMessage("Cannot attach table " + backQuote(database_name) + "." + backQuote(query.table) - + " from metadata file " + metadata_path) - + " from query " + serializeAST(query); + + " from metadata file " + metadata_path + + " from query " + serializeAST(query)); throw; } } From 67d202a5f01396ad7f5a36c147f8ab4095850a1e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 22:30:08 +0300 Subject: [PATCH 148/215] Addition to prev. revision --- dbms/src/Databases/DatabaseOrdinary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index ce09c739e22..f97fc213380 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -159,7 +159,7 @@ void DatabaseOrdinary::loadStoredObjects( if (!create_query.is_dictionary) pool.scheduleOrThrowOnError([&]() { - tryAttachTable(context, create_query, *this, getDatabaseName(), name_with_query.first, has_force_restore_data_flag); + tryAttachTable(context, create_query, *this, getDatabaseName(), getMetadataPath() + name_with_query.first, has_force_restore_data_flag); /// Messages, so that it's not boring to wait for the server to load for a long time. logAboutProgress(log, ++tables_processed, total_tables, watch); From 5696806426714c45d95b3f27458dde0fa7852e84 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 22:55:39 +0300 Subject: [PATCH 149/215] Translate a comment in mysqlxx #4904 --- base/mysqlxx/include/mysqlxx/Pool.h | 2 +- base/mysqlxx/include/mysqlxx/Value.h | 45 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/base/mysqlxx/include/mysqlxx/Pool.h b/base/mysqlxx/include/mysqlxx/Pool.h index db41b059357..b2b04f3bdfc 100644 --- a/base/mysqlxx/include/mysqlxx/Pool.h +++ b/base/mysqlxx/include/mysqlxx/Pool.h @@ -198,7 +198,7 @@ public: return description; } - void removeConnection(Connection* data); + void removeConnection(Connection * data); protected: /// Number of MySQL connections which are created at launch. diff --git a/base/mysqlxx/include/mysqlxx/Value.h b/base/mysqlxx/include/mysqlxx/Value.h index 1ca83a8d3a6..2b3465d52d1 100644 --- a/base/mysqlxx/include/mysqlxx/Value.h +++ b/base/mysqlxx/include/mysqlxx/Value.h @@ -23,26 +23,26 @@ namespace mysqlxx class ResultBase; -/** Представляет одно значение, считанное из MySQL. - * Объект сам не хранит данные, а является всего лишь обёрткой над парой (const char *, size_t). - * Если уничтожить UseQueryResult/StoreQueryResult или Connection, - * или считать следующий Row при использовании UseQueryResult, то объект станет некорректным. - * Позволяет преобразовать значение (распарсить) в различные типы данных: - * - с помощью функций вида getUInt(), getString(), ... (рекомендуется); - * - с помощью шаблонной функции get(), которая специализирована для многих типов (для шаблонного кода); - * - шаблонная функция get работает также для всех типов, у которых есть конструктор из Value - * (это сделано для возможности расширения); - * - с помощью operator Type() - но этот метод реализован лишь для совместимости и не рекомендуется - * к использованию, так как неудобен (часто возникают неоднозначности). +/** Represents a single value read from MySQL. + * It doesn't owns the value. It's just a wrapper of a pair (const char *, size_t). + * If the UseQueryResult/StoreQueryResult or Connection is destroyed, + * or you have read the next Row while using UseQueryResult, then the object is invalidated. + * Allows to transform (parse) the value to various data types: + * - with getUInt(), getString(), ... (recommended); + * - with template function get() that is specialized for multiple data types; + * - the template function get also works for all types that can be constructed from Value + * (it is an extension point); + * - with operator Type() - this is done for compatibility and not recommended because ambiguities possible. * - * При ошибке парсинга, выкидывается исключение. - * При попытке достать значение, которое равно nullptr, выкидывается исключение - * - используйте метод isNull() для проверки. + * On parsing error, exception is thrown. + * When trying to extract a value that is nullptr, exception is thrown + * - use isNull() method to check. * - * Во всех распространённых системах, time_t - это всего лишь typedef от Int64 или Int32. - * Для того, чтобы можно было писать row[0].get(), ожидая, что значение вида '2011-01-01 00:00:00' - * корректно распарсится согласно текущей тайм-зоне, сделано так, что метод getUInt и соответствующие методы get<>() - * также умеют парсить дату и дату-время. + * As time_t is just an alias for integer data type + * to allow to write row[0].get(), and expect that the values like '2011-01-01 00:00:00' + * will be successfully parsed according to the current time zone, + * the getUInt method and the corresponding get<>() methods + * are capable of parsing Date and DateTime. */ class Value { @@ -166,7 +166,7 @@ private: else throwException("Cannot parse DateTime"); - return 0; /// чтобы не было warning-а. + return 0; /// avoid warning. } @@ -184,7 +184,7 @@ private: else throwException("Cannot parse Date"); - return 0; /// чтобы не было warning-а. + return 0; /// avoid warning. } @@ -231,7 +231,7 @@ private: double readFloatText(const char * buf, size_t length) const; /// Выкинуть исключение с подробной информацией - void throwException(const char * text) const; + [[noreturn]] void throwException(const char * text) const; }; @@ -239,8 +239,7 @@ template <> inline bool Value::get() cons template <> inline char Value::get() const { return getInt(); } template <> inline signed char Value::get() const { return getInt(); } template <> inline unsigned char Value::get() const { return getUInt(); } -// crodriguez uncomment -//template <> inline char8_t Value::get() const { return getUInt(); } +template <> inline char8_t Value::get() const { return getUInt(); } template <> inline short Value::get() const { return getInt(); } template <> inline unsigned short Value::get() const { return getUInt(); } template <> inline int Value::get() const { return getInt(); } From c968e55da719304b29eaafc4a873f83f626503b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 23:00:49 +0300 Subject: [PATCH 150/215] Miscellaneous #9409 --- base/mysqlxx/src/Pool.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/base/mysqlxx/src/Pool.cpp b/base/mysqlxx/src/Pool.cpp index 280c0e06276..b8216c254e6 100644 --- a/base/mysqlxx/src/Pool.cpp +++ b/base/mysqlxx/src/Pool.cpp @@ -21,8 +21,8 @@ void Pool::Entry::incrementRefCount() { if (!data) return; - ++(data->ref_count); - if (data->ref_count==1) + ++data->ref_count; + if (data->ref_count == 1) mysql_thread_init(); } @@ -32,8 +32,8 @@ void Pool::Entry::decrementRefCount() return; if (data->ref_count > 0) { - --(data->ref_count); - if (data->ref_count==0) + --data->ref_count; + if (data->ref_count == 0) mysql_thread_end(); } } From d4d48a016594ee7e2ccc8c62b3ad72f24bddcabc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 23:02:55 +0300 Subject: [PATCH 151/215] Fix style --- dbms/src/Interpreters/Context.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 62f2ceb32cb..b057cda53ea 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -93,6 +93,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int UNKNOWN_SCALAR; extern const int AUTHENTICATION_FAILED; + extern const int NOT_IMPLEMENTED; } From fa85385d34f0520e30f7ec1c87e631ab6fadcb1e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 23:11:06 +0300 Subject: [PATCH 152/215] Make "performance_introspection_and_logging" test reliable to random stucks --- .../00634_performance_introspection_and_logging.reference | 2 +- .../0_stateless/00634_performance_introspection_and_logging.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.reference b/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.reference index 9f61a33663e..8c3fc467d57 100644 --- a/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.reference +++ b/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.reference @@ -2,4 +2,4 @@ 0 1 1 1 0 -1 1 1 +1 1 diff --git a/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index 1bf995a1a7e..15961eac73f 100755 --- a/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/dbms/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -63,7 +63,6 @@ $CLICKHOUSE_CLIENT $settings -q " SELECT -- max(thread_realtime), $query_elapsed, max(thread_time_user_system_io), 0.9 * $query_elapsed <= max(thread_realtime) AND max(thread_realtime) <= 1.1 * $query_elapsed, - 0.7 * $query_elapsed <= max(thread_time_user_system_io) AND max(thread_time_user_system_io) <= 1.3 * $query_elapsed, uniqExact(thread_id) = $threads FROM ( From 1c5c8b71311ad62f49c79acee33884575bb1e7f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Mar 2020 23:24:40 +0300 Subject: [PATCH 153/215] Fixed symlinks #9220 #9222 --- .../features_considered_disadvantages.md | 1 + docs/ja/introduction/history.md | 47 ++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) mode change 120000 => 100644 docs/ja/introduction/features_considered_disadvantages.md mode change 120000 => 100644 docs/ja/introduction/history.md diff --git a/docs/ja/introduction/features_considered_disadvantages.md b/docs/ja/introduction/features_considered_disadvantages.md deleted file mode 120000 index 9a7e9ae5758..00000000000 --- a/docs/ja/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,7 +0,0 @@ -# 欠点と考えられるClickHouseの機能 - -1. 本格的なトランザクションはありません。 -2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 -3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 - -[Original article](https://clickhouse.yandex/docs/en/introduction/features_considered_disadvantages/) diff --git a/docs/ja/introduction/features_considered_disadvantages.md b/docs/ja/introduction/features_considered_disadvantages.md new file mode 100644 index 00000000000..596fb9ba86f --- /dev/null +++ b/docs/ja/introduction/features_considered_disadvantages.md @@ -0,0 +1,8 @@ +# 欠点と考えられるClickHouseの機能 + +1. 本格的なトランザクションはありません。 +2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 +3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 + +[Original article](https://clickhouse.yandex/docs/en/introduction/features_considered_disadvantages/) + diff --git a/docs/ja/introduction/history.md b/docs/ja/introduction/history.md deleted file mode 120000 index 7004e990a59..00000000000 --- a/docs/ja/introduction/history.md +++ /dev/null @@ -1 +0,0 @@ -../../en/introduction/history.md \ No newline at end of file diff --git a/docs/ja/introduction/history.md b/docs/ja/introduction/history.md new file mode 100644 index 00000000000..fbae277542d --- /dev/null +++ b/docs/ja/introduction/history.md @@ -0,0 +1,46 @@ +# ClickHouseの歴史 + +ClickHouseは元々、 [世界で2番目に大きなWeb分析プラットフォーム ](http://w3techs.com/technologies/overview/traffic_analysis/all) である [Yandex.Metrica](https://metrica.yandex.com/)を強化するために開発されたもので、このシステムのコアコンポーネントであり続けています。データベースには13兆を超えるレコードがあり、毎日200億を超えるイベントが発生しますが、ClickHouseでは集計されていないデータから直接カスタムレポートを生成できます。この記事では、ClickHouseの開発の初期段階におけるClickHouseの目標について簡単に説明します。 + +Yandex.Metricaは、ユーザーが定義した任意のセグメントを使用して、ヒットとセッションに基づいてカスタマイズされたレポートをその都度作成します。これには、多くの場合、一意のユーザー数などの複雑な集計を作成する必要があり、レポートを作成するための新しいデータがリアルタイムで受信されます。 + +2014年4月の時点で、Yandex.Metricaは毎日約120億のイベント(ページビューとクリック)を追跡していました。カスタムレポートを作成するには、これらすべてのイベントを保存する必要があります。単一のクエリで、数百ミリ秒以内に数百万行をスキャンしたり、わずか数秒で数億行をスキャンする必要があります。 + +## Yandex.Metricaおよびその他のYandexサービスでの用途 + +ClickHouseは、Yandex.Metricaで複数の目的に使用されます。その主なタスクは、非集計データを使用してオンラインでレポートを作成することです。 374台のサーバーからなるクラスターを使用し、20.3兆行をデータベースに保存します。圧縮されたデータの量は、重複データとレプリケーションを除いて約2PBです。非圧縮データ(TSV形式)の量は約17PBにもなります。 + +ClickHouseは以下の目的にも使用されます。 + +- Yandex.Metricaのデータをセッションリプレイのために保存する。 +- 中間データを処理する。 +- Analyticsを使用したグローバルレポートの作成。 +- Yandex.Metricaエンジンをデバッグするためのクエリの実行。 +- APIおよびユーザーインターフェイスからのログの分析。 + +ClickHouseは少なくとも、そのほか12のYandexのサービス(検索分野、Market、Direct、ビジネス分析、モバイル開発、AdFox、パーソナルサービスなど)で利用されています。 + +## 集約されたデータと非集約データ + +統計を効果的に計算するには、データの量を減らすため、データを集計する必要があるという一般的な意見があります。 + +ただし次の理由により、データ集約は非常に限られた解決策です。 + +- ユーザーが必要とするレポートの事前定義リストが必要です。 +- ユーザーはカスタムレポートを作成できません。 +- 大量のキーを集約する場合、データ量は削減されず、集約は役に立ちません。 +- 多数のレポートの場合、集計のバリエーションが多すぎます(組み合わせ爆発) +- カーディナリティの高いキー(URLなど)を集約する場合、データの量はそれほど減少しません(たかだか半分程度)。 +- このため、集約されたデータの量は減少するどころか増加する場合があります。 +- 生成した全てのレポートをユーザが見るわけではありません。計算の大部分は無駄になっています。 +- データの論理的な整合性は、さまざまな集計に対して違反する可能性があります。 + +何も集約せず、集約されていないデータを操作する場合、実際には計算量が減る可能性があります。 + +しかしながら集約は、その大部分がオフラインで実行され、比較的ゆったりと処理します。対照的に、オンライン計算では、ユーザーが結果を待っているため、できるだけ高速に計算する必要があります。 + +Yandex.Metricaには、Metrageと呼ばれるデータを集計するための特別なシステムがあり、これはほとんどのレポートで使用されています。 2009年以降、Yandex.Metricaは、以前にレポートビルダーで使用されていたOLAPServerと呼ばれる非集計データ用の特殊なOLAPデータベースも使用しました。 OLAPServerは非集計データに対してはうまく機能しましたが、多くの制限があり、必要に応じてすべてのレポートに使用することはできませんでした。制限とは、(数値のみしか扱えない)データ型サポートの欠如や、リアルタイムでデータを段階的に更新できないこと(毎日データを書き換えることによってのみ更新可能)がありました。 OLAPServerはDBMSではなく、特殊なDBだったのです。 + +OLAPServerの制限を取り除き、レポートのための非集計データを扱う問題を解決するために、私達は ClickHouse DBMSを開発しました。 + +[Original article](https://clickhouse.yandex/docs/en/introduction/history/) From 1a2122a8540e4e1c36ae829a7875c9a0171a9242 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 6 Mar 2020 00:03:59 +0300 Subject: [PATCH 154/215] Fixed wrong log messages about missing default disk or policy. --- dbms/src/Disks/DiskSpaceMonitor.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Disks/DiskSpaceMonitor.cpp b/dbms/src/Disks/DiskSpaceMonitor.cpp index d555e8152b0..0318891fe4e 100644 --- a/dbms/src/Disks/DiskSpaceMonitor.cpp +++ b/dbms/src/Disks/DiskSpaceMonitor.cpp @@ -59,6 +59,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig(const Poco::Util::AbstractConfigu std::shared_ptr result = std::make_shared(*this); + constexpr auto default_disk_name = "default"; std::set old_disks_minus_new_disks; for (const auto & [disk_name, _] : result->disks) { @@ -84,6 +85,8 @@ DiskSelectorPtr DiskSelector::updateFromConfig(const Poco::Util::AbstractConfigu } } + old_disks_minus_new_disks.erase(default_disk_name); + if (!old_disks_minus_new_disks.empty()) { WriteBufferFromOwnString warning; @@ -465,9 +468,11 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti std::shared_ptr result = std::make_shared(config, config_prefix, disks); + constexpr auto default_storage_policy_name = "default"; + for (const auto & [name, policy] : policies) { - if (result->policies.count(name) == 0) + if (name != default_storage_policy_name && result->policies.count(name) == 0) throw Exception("Storage policy " + backQuote(name) + " is missing in new configuration", ErrorCodes::BAD_ARGUMENTS); policy->checkCompatibleWith(result->policies[name]); From 5d077e0213dc70975a7579611cfc2113aadb7312 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 00:41:46 +0300 Subject: [PATCH 155/215] Fixed error --- dbms/src/Interpreters/Context.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index a7011a1d4ae..25f2c2a679e 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -138,7 +138,7 @@ public: throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); /// Create a new session from current context. - it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; + it = sessions.emplace(key, std::make_shared(key, context, timeout, *this)).first; } else if (it->second->key.first != context.client_info.current_user) { @@ -174,6 +174,8 @@ private: }; using Container = std::unordered_map, SessionKeyHash>; + + /// TODO it's very complicated. Make simple std::map with time_t or boost::multi_index. using CloseTimes = std::deque>; Container sessions; CloseTimes close_times; @@ -236,8 +238,17 @@ private: { const auto session = sessions.find(key); - if (session != sessions.end() && session->second.unique() && session->second->close_cycle <= current_cycle) - sessions.erase(session); + if (session != sessions.end() && session->second->close_cycle <= current_cycle) + { + if (!session->second.unique()) + { + /// Skip but move it to close on the next cycle. + session->second->timeout = std::chrono::steady_clock::duration{0}; + scheduleCloseSession(*session->second, lock); + } + else + sessions.erase(session); + } } close_times.pop_front(); From a0ace3e94ad87dde64d1fdc309531cb4ec718839 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 00:50:58 +0300 Subject: [PATCH 156/215] Fixed error --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 145c9c84ae0..dbc963e0a27 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -220,7 +220,7 @@ private: } /// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added. - std::chrono::steady_clock::duration closeSessions(std::unique_lock &) + std::chrono::steady_clock::duration closeSessions(std::unique_lock & lock) { const auto now = std::chrono::steady_clock::now(); From f5518c0c439f0156812ae3f3c5bb1423c5c46d84 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 01:45:59 +0300 Subject: [PATCH 157/215] Simplification --- dbms/programs/server/HTTPHandler.cpp | 10 ++-- dbms/src/Interpreters/Context.cpp | 83 ++++++++-------------------- dbms/src/Interpreters/Context.h | 7 +-- 3 files changed, 30 insertions(+), 70 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index a4c59ff9e25..b360b0a89f4 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -141,15 +141,15 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti } -static std::chrono::steady_clock::duration parseSessionTimeout( +static uint32_t parseSessionTimeout( const Poco::Util::AbstractConfiguration & config, const HTMLForm & params) { - unsigned session_timeout = config.getInt("default_session_timeout", 60); + uint32_t session_timeout = config.getInt("default_session_timeout", 60); if (params.has("session_timeout")) { - unsigned max_session_timeout = config.getUInt("max_session_timeout", 3600); + uint32_t max_session_timeout = config.getUInt("max_session_timeout", 3600); std::string session_timeout_str = params.get("session_timeout"); ReadBufferFromString buf(session_timeout_str); @@ -162,7 +162,7 @@ static std::chrono::steady_clock::duration parseSessionTimeout( ErrorCodes::INVALID_SESSION_TIMEOUT); } - return std::chrono::seconds(session_timeout); + return session_timeout; } @@ -275,7 +275,7 @@ void HTTPHandler::processQuery( std::shared_ptr session; String session_id; - std::chrono::steady_clock::duration session_timeout; + uint32_t session_timeout; bool session_is_set = params.has("session_id"); const auto & config = server.config(); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index dbc963e0a27..aa688ff9dd5 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -124,7 +124,7 @@ public: std::shared_ptr acquireSession( const String & session_id, Context & context, - std::chrono::steady_clock::duration timeout, + uint32_t timeout, bool throw_if_not_found) { std::unique_lock lock(mutex); @@ -162,7 +162,7 @@ public: void releaseSession(NamedSession & session) { std::unique_lock lock(mutex); - scheduleCloseSession(session, lock); + close_times.emplace(time(nullptr) + session.timeout, session.key); } private: @@ -178,31 +178,11 @@ private: } }; - /// TODO it's very complicated. Make simple std::map with time_t or boost::multi_index. using Container = std::unordered_map, SessionKeyHash>; - using CloseTimes = std::deque>; + using CloseTimes = std::multimap; + Container sessions; CloseTimes close_times; - std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1); - std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now(); - UInt64 close_cycle = 0; - - void scheduleCloseSession(NamedSession & session, std::unique_lock &) - { - /// Push it on a queue of sessions to close, on a position corresponding to the timeout. - /// (timeout is measured from current moment of time) - - const UInt64 close_index = session.timeout / close_interval + 1; - const auto new_close_cycle = close_cycle + close_index; - - if (session.close_cycle != new_close_cycle) - { - session.close_cycle = new_close_cycle; - if (close_times.size() < close_index + 1) - close_times.resize(close_index + 1); - close_times[close_index].emplace_back(session.key); - } - } void cleanThread() { @@ -212,51 +192,32 @@ private: while (true) { - auto interval = closeSessions(lock); - - if (cond.wait_for(lock, interval, [this]() -> bool { return quit; })) + closeSessions(lock); + if (cond.wait_for(lock, std::chrono::seconds(1), [this]() -> bool { return quit; })) break; } } /// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added. - std::chrono::steady_clock::duration closeSessions(std::unique_lock & lock) + void closeSessions(std::unique_lock &) { - const auto now = std::chrono::steady_clock::now(); - - /// The time to close the next session did not come - if (now < close_cycle_time) - return close_cycle_time - now; /// Will sleep until it comes. - - const auto current_cycle = close_cycle; - - ++close_cycle; - close_cycle_time = now + close_interval; - - if (close_times.empty()) - return close_interval; - - auto & sessions_to_close = close_times.front(); - - for (const auto & key : sessions_to_close) + time_t now = time(nullptr); + for (auto it = close_times.begin(); it != close_times.end();) { - const auto session = sessions.find(key); + if (it->first >= now) + break; - if (session != sessions.end() && session->second->close_cycle <= current_cycle) - { - if (!session->second.unique()) - { - /// Skip but move it to close on the next cycle. - session->second->timeout = std::chrono::steady_clock::duration{0}; - scheduleCloseSession(*session->second, lock); - } - else - sessions.erase(session); - } + const auto session_it = sessions.find(it->second); + it = close_times.erase(it); + + if (session_it == sessions.end()) + continue; + + if (session_it->second.unique()) + sessions.erase(session_it); + else + close_times.emplace(now + session_it->second->timeout, session_it->second->key); /// Does not invalidate iterators. } - - close_times.pop_front(); - return close_interval; } std::mutex mutex; @@ -520,7 +481,7 @@ void Context::enableNamedSessions() shared->named_sessions.emplace(); } -std::shared_ptr Context::acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) +std::shared_ptr Context::acquireNamedSession(const String & session_id, uint32_t timeout, bool session_check) { if (!shared->named_sessions) throw Exception("Support for named sessions is not enabled", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 5b5b8bdabd5..13053e4c8f2 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -426,7 +426,7 @@ public: /// The method must be called at the server startup. void enableNamedSessions(); - std::shared_ptr acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); + std::shared_ptr acquireNamedSession(const String & session_id, uint32_t timeout, bool session_check); /// For methods below you may need to acquire a lock by yourself. std::unique_lock getLock() const; @@ -672,12 +672,11 @@ using NamedSessionKey = std::pair; struct NamedSession { NamedSessionKey key; - UInt64 close_cycle = 0; Context context; - std::chrono::steady_clock::duration timeout; + uint32_t timeout; NamedSessions & parent; - NamedSession(NamedSessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, NamedSessions & parent_) + NamedSession(NamedSessionKey key_, Context & context_, uint32_t timeout_, NamedSessions & parent_) : key(key_), context(context_), timeout(timeout_), parent(parent_) { } From 7dd9b057874fd8c434e00e61ba4e1d3ec0ffd482 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 03:43:17 +0300 Subject: [PATCH 158/215] Make sure that all columns are generated in performance test --- .../performance/generate_table_function.xml | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/dbms/tests/performance/generate_table_function.xml b/dbms/tests/performance/generate_table_function.xml index 4674b81af99..02dcbc493df 100644 --- a/dbms/tests/performance/generate_table_function.xml +++ b/dbms/tests/performance/generate_table_function.xml @@ -8,23 +8,23 @@ - SELECT COUNT(*) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i UUID', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000); - SELECT COUNT(*) FROM (SELECT * FROM generate('i String', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i UUID', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i String', 10, 10, 1) LIMIT 100000); From 2fd918a162765c41431c0aadae9b807719d7ebc8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 05:12:18 +0300 Subject: [PATCH 159/215] Better code --- dbms/src/Storages/StorageGenerate.cpp | 491 ------------------ dbms/src/Storages/StorageGenerateRandom.cpp | 370 +++++++++++++ ...rageGenerate.h => StorageGenerateRandom.h} | 8 +- ...te.cpp => TableFunctionGenerateRandom.cpp} | 10 +- ...nerate.h => TableFunctionGenerateRandom.h} | 8 +- .../performance/generate_table_function.xml | 38 +- 6 files changed, 403 insertions(+), 522 deletions(-) delete mode 100644 dbms/src/Storages/StorageGenerate.cpp create mode 100644 dbms/src/Storages/StorageGenerateRandom.cpp rename dbms/src/Storages/{StorageGenerate.h => StorageGenerateRandom.h} (65%) rename dbms/src/TableFunctions/{TableFunctionGenerate.cpp => TableFunctionGenerateRandom.cpp} (82%) rename dbms/src/TableFunctions/{TableFunctionGenerate.h => TableFunctionGenerateRandom.h} (52%) diff --git a/dbms/src/Storages/StorageGenerate.cpp b/dbms/src/Storages/StorageGenerate.cpp deleted file mode 100644 index bb306d2542f..00000000000 --- a/dbms/src/Storages/StorageGenerate.cpp +++ /dev/null @@ -1,491 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int NOT_IMPLEMENTED; -extern const int LOGICAL_ERROR; -extern const int BAD_TYPE_OF_FIELD; -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - - -void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 limit, - UInt64 max_array_length, UInt64 max_string_length, pcg32 & generator, pcg64_fast & generator64) -{ - TypeIndex idx = type->getTypeId(); - - switch (idx) - { - case TypeIndex::Nothing: - throw Exception("Random Generator not implemented for type 'Nothing'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::UInt8: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::UInt16: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::UInt32: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::UInt64: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - UInt64 a = static_cast(generator64()); - data[i] = static_cast(a); - } - break; - } - case TypeIndex::UInt128: - throw Exception("There is no DataType 'UInt128' support.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Int8: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::Int16: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::Int32: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::Int64: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator64()); - } - break; - } - case TypeIndex::Int128: - throw Exception("There is no DataType 'Int128' support.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::Float32: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - double d = 1.0; - for (UInt64 i = 0; i < limit; ++i) - { - d = std::numeric_limits::max(); - data[i] = (d / pcg32::max()) * generator(); - } - break; - } - case TypeIndex::Float64: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - double d = 1.0; - for (UInt64 i = 0; i < limit; ++i) - { - d = std::numeric_limits::max(); - data[i] = (d / pcg64::max()) * generator64(); - } - break; - } - case TypeIndex::Date: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::DateTime: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::DateTime64: - { - UInt32 scale; - if (auto * ptype = typeid_cast(type.get())) - scale = ptype->getScale(); - else - throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD); - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - UInt32 fractional = static_cast(generator()) % intExp10(scale); - UInt32 whole = static_cast(generator()); - DateTime64 dt = DecimalUtils::decimalFromComponents(whole, fractional, scale); - data[i] = dt; - } - break; - } - case TypeIndex::String: - { - auto & column_string = typeid_cast(column); - auto & offsets = column_string.getOffsets(); - auto & chars = column_string.getChars(); - - UInt64 offset = 0; - { - offsets.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - offset += 1 + static_cast(generator()) % max_string_length; - offsets[i] = offset; - } - chars.resize(offset); - for (UInt64 i = 0; i < offset; ++i) - { - if (offset - i > 5) - { - UInt32 r = generator(); - chars[i] = 32 + (r & 0x7F) % 95; - chars[i + 1] = 32 + ((r >> 7) & 0x7F) % 95; - chars[i + 2] = 32 + ((r >> 14) & 0x7F) % 95; - chars[i + 3] = 32 + ((r >> 21) & 0x7F) % 95; - chars[i + 4] = 32 + (r >> 28); - i += 4; - } - else - { - UInt32 r = generator(); - chars[i] = 32 + (r % 95); - } - } - // add terminating zero char - for (auto & i : offsets) - { - chars[i - 1] = 0; - } - } - break; - } - case TypeIndex::FixedString: - { - auto & column_string = typeid_cast(column); - const size_t len = column_string.sizeOfValueIfFixed(); - auto & chars = column_string.getChars(); - - UInt64 num_chars = static_cast(len) * limit; - { - chars.resize(num_chars); - for (UInt64 i = 0; i < num_chars; ++i) - { - chars[i] = static_cast(generator()); - } - } - break; - } - case TypeIndex::Enum8: - { - auto values = typeid_cast *>(type.get())->getValues(); - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - - UInt8 size = values.size(); - UInt8 off; - for (UInt64 i = 0; i < limit; ++i) - { - off = static_cast(generator()) % size; - data[i] = values[off].second; - } - break; - } - case TypeIndex::Enum16: - { - auto values = typeid_cast *>(type.get())->getValues(); - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - - UInt16 size = values.size(); - UInt8 off; - for (UInt64 i = 0; i < limit; ++i) - { - off = static_cast(generator()) % size; - data[i] = values[off].second; - } - break; - } - case TypeIndex::Decimal32: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - data[i] = static_cast(generator()); - } - break; - } - case TypeIndex::Decimal64: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - UInt64 a = static_cast(generator()) << 32 | static_cast(generator()); - data[i] = a; - } - break; - } - case TypeIndex::Decimal128: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - Int128 x = static_cast(generator64()) << 64 | static_cast(generator64()); - data[i] = x; - } - break; - } - case TypeIndex::UUID: - { - auto & data = typeid_cast &>(column).getData(); - data.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - UInt64 a = static_cast(generator64()); - UInt64 b = static_cast(generator64()); - auto x = UInt128(a, b); - data[i] = x; - } - break; - } - case TypeIndex::Array: - { - auto & column_array = typeid_cast(column); - auto nested_type = typeid_cast(type.get())->getNestedType(); - - auto & offsets = column_array.getOffsets(); - IColumn & data = column_array.getData(); - - UInt64 offset = 0; - { - offsets.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - offset += static_cast(generator()) % max_array_length; - offsets[i] = offset; - } - } - fillColumnWithRandomData(data, nested_type, offset, max_array_length, max_string_length, generator, generator64); - break; - } - case TypeIndex::Tuple: - { - auto &column_tuple = typeid_cast(column); - auto elements = typeid_cast(type.get())->getElements(); - - for (size_t i = 0; i < column_tuple.tupleSize(); ++i) - { - fillColumnWithRandomData(column_tuple.getColumn(i), elements[i], limit, max_array_length, max_string_length, generator, generator64); - } - break; - } - case TypeIndex::Set: - throw Exception("Type 'Set' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); - case TypeIndex::Interval: - throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); - case TypeIndex::Nullable: - { - auto & column_nullable = typeid_cast(column); - auto nested_type = typeid_cast(type.get())->getNestedType(); - - auto & null_map = column_nullable.getNullMapData(); - IColumn & nested_column = column_nullable.getNestedColumn(); - - fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, generator, generator64); - - null_map.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - { - null_map[i] = generator() < 1024; /// No real motivation for this. - } - break; - } - case TypeIndex::Function: - throw Exception("Type 'Function' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR); - case TypeIndex::AggregateFunction: - throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED); - case TypeIndex::LowCardinality: - throw Exception("Random Generator not implemented for type 'LowCardinality'.", ErrorCodes::NOT_IMPLEMENTED); - } -} - -StorageGenerate::StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_, - UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_) - : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) -{ - random_seed = random_seed_ ? random_seed_ : randomSeed(); - setColumns(columns_); -} - - -class GenerateSource : public SourceWithProgress -{ -public: - GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_) - : SourceWithProgress(block_header_), block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_) - , block_header(block_header_), r32(random_seed_), r64(random_seed_) {} - - String getName() const override { return "Generate"; } - -protected: - Chunk generate() override - { - auto columns = block_header.cloneEmptyColumns(); - DataTypes types = block_header.getDataTypes(); - auto cur_type = types.cbegin(); - for (auto & col : columns) - { - fillColumnWithRandomData(col->assumeMutableRef(), *cur_type, block_size, max_array_length, max_string_length, r32, r64); - ++cur_type; - } - return {std::move(columns), block_size}; - } - -private: - UInt64 block_size; - UInt64 max_array_length; - UInt64 max_string_length; - Block block_header; - - pcg32 r32; - pcg64_fast r64; - -}; - - -void registerStorageGenerate(StorageFactory & factory) -{ - factory.registerStorage("Generate", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.size() > 3) - throw Exception("Storage Generate requires at most three arguments: "\ - "max_array_length, max_string_length, random_seed.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - UInt64 max_array_length_ = 10; - UInt64 max_string_length_ = 10; - UInt64 random_seed_ = 0; // zero for random - - /// Parsing second argument if present - if (engine_args.size() >= 1) - max_array_length_ = engine_args[0]->as().value.safeGet(); - - if (engine_args.size() >= 2) - max_string_length_ = engine_args[1]->as().value.safeGet(); - - if (engine_args.size() == 3) - random_seed_ = engine_args[2]->as().value.safeGet(); - - return StorageGenerate::create(args.table_id, args.columns, max_array_length_, max_string_length_, random_seed_); - }); -} - -Pipes StorageGenerate::read( - const Names & column_names, - const SelectQueryInfo & /*query_info*/, - const Context & /*context*/, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - unsigned num_streams) -{ - check(column_names, true); - - Pipes pipes; - pipes.reserve(num_streams); - - const ColumnsDescription & columns_ = getColumns(); - Block block_header; - for (const auto & name : column_names) - { - const auto & name_type = columns_.get(name); - MutableColumnPtr column = name_type.type->createColumn(); - block_header.insert({std::move(column), name_type.type, name_type.name}); - } - - pcg32 generate(random_seed); - for (UInt64 i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header)); - } - return pipes; -} - -} diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp new file mode 100644 index 00000000000..d77c9f336e9 --- /dev/null +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -0,0 +1,370 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +namespace +{ + +void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64_fast & rng) +{ + char * __restrict end = data + size; + while (data < end) + { + /// The loop can be further optimized. + UInt64 number = rng(); + unalignedStore(data, number); + data += sizeof(UInt64); /// We assume that data has 15-byte padding (see PaddedPODArray) + } +} + + +ColumnPtr fillColumnWithRandomData( + const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg64_fast & rng, const Context & context) +{ + TypeIndex idx = type->getTypeId(); + + switch (idx) + { + case TypeIndex::String: + { + Block block{ColumnWithTypeAndName{nullptr, type, "result"}}; + FunctionFactory::instance().get("randomPrintableASCII", context)->build({})->execute(block, {}, 0, limit); + return block.getByPosition(0).column; + } + + case TypeIndex::Enum8: + { + auto column = ColumnVector::create(); + auto values = typeid_cast *>(type.get())->getValues(); + auto & data = column->getData(); + data.resize(limit); + + UInt8 size = values.size(); + UInt8 off; + for (UInt64 i = 0; i < limit; ++i) + { + off = static_cast(rng()) % size; + data[i] = values[off].second; + } + + return column; + } + + case TypeIndex::Enum16: + { + auto column = ColumnVector::create(); + auto values = typeid_cast *>(type.get())->getValues(); + auto & data = column->getData(); + data.resize(limit); + + UInt16 size = values.size(); + UInt8 off; + for (UInt64 i = 0; i < limit; ++i) + { + off = static_cast(rng()) % size; + data[i] = values[off].second; + } + + return column; + } + + case TypeIndex::Array: + { + auto nested_type = typeid_cast(type.get())->getNestedType(); + + auto offsets_column = ColumnVector::create(); + auto & offsets = offsets_column->getData(); + + UInt64 offset = 0; + offsets.resize(limit); + for (UInt64 i = 0; i < limit; ++i) + { + offset += static_cast(rng()) % max_array_length; + offsets[i] = offset; + } + + auto data_column = fillColumnWithRandomData(nested_type, offset, max_array_length, max_string_length, rng, context); + + return ColumnArray::create(std::move(data_column), std::move(offsets_column)); + } + + case TypeIndex::Tuple: + { + auto elements = typeid_cast(type.get())->getElements(); + const size_t tuple_size = elements.size(); + Columns tuple_columns(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = fillColumnWithRandomData(elements[i], limit, max_array_length, max_string_length, rng, context); + + return ColumnTuple::create(std::move(tuple_columns)); + } + + case TypeIndex::Nullable: + { + auto nested_type = typeid_cast(type.get())->getNestedType(); + auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context); + + auto null_map_column = ColumnUInt8::create(); + auto & null_map = null_map_column->getData(); + null_map.resize(limit); + for (UInt64 i = 0; i < limit; ++i) + null_map[i] = rng() % 16 == 0; /// No real motivation for this. + + return ColumnNullable::create(std::move(nested_column), std::move(null_map_column)); + } + + case TypeIndex::UInt8: + { + auto column = ColumnUInt8::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(UInt8), rng); + return column; + } + case TypeIndex::UInt16: [[fallthrough]]; + case TypeIndex::Date: + { + auto column = ColumnUInt16::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(UInt16), rng); + return column; + } + case TypeIndex::UInt32: [[fallthrough]]; + case TypeIndex::DateTime: + { + auto column = ColumnUInt32::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(UInt32), rng); + return column; + } + case TypeIndex::UInt64: + { + auto column = ColumnUInt64::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(UInt64), rng); + return column; + } + case TypeIndex::UInt128: [[fallthrough]]; + case TypeIndex::UUID: + { + auto column = ColumnUInt128::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(UInt128), rng); + return column; + } + case TypeIndex::Int8: + { + auto column = ColumnInt8::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(Int8), rng); + return column; + } + case TypeIndex::Int16: + { + auto column = ColumnInt16::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(Int16), rng); + return column; + } + case TypeIndex::Int32: + { + auto column = ColumnInt32::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(Int32), rng); + return column; + } + case TypeIndex::Int64: + { + auto column = ColumnInt64::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(Int64), rng); + return column; + } + case TypeIndex::Float32: + { + auto column = ColumnFloat32::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(Float32), rng); + return column; + } + case TypeIndex::Float64: + { + auto column = ColumnFloat64::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(Float64), rng); + return column; + } + case TypeIndex::Decimal32: + { + auto column = type->createColumn(); + auto & column_concrete = typeid_cast &>(*column); + column_concrete.getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal32), rng); + return column; + } + case TypeIndex::Decimal64: [[fallthrough]]; + case TypeIndex::DateTime64: + { + auto column = type->createColumn(); + auto & column_concrete = typeid_cast &>(*column); + column_concrete.getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal64), rng); + return column; + } + case TypeIndex::Decimal128: + { + auto column = type->createColumn(); + auto & column_concrete = typeid_cast &>(*column); + column_concrete.getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal128), rng); + return column; + } + + default: + throw Exception("The 'GenerateRandom' is not implemented for type " + type->getName(), ErrorCodes::NOT_IMPLEMENTED); + } +} + + +class GenerateSource : public SourceWithProgress +{ +public: + GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, const Context & context_) + : SourceWithProgress(block_header_), block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_) + , block_header(block_header_), rng(random_seed_), context(context_) {} + + String getName() const override { return "GenerateRandom"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + DataTypes types = block_header.getDataTypes(); + + for (const auto & type : types) + columns.emplace_back(fillColumnWithRandomData(type, block_size, max_array_length, max_string_length, rng, context)); + + return {std::move(columns), block_size}; + } + +private: + UInt64 block_size; + UInt64 max_array_length; + UInt64 max_string_length; + Block block_header; + + pcg64_fast rng; + + const Context & context; +}; + +} + + +StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const ColumnsDescription & columns_, + UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_) + : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) +{ + random_seed = random_seed_ ? random_seed_ : randomSeed(); + setColumns(columns_); +} + + +void registerStorageGenerateRandom(StorageFactory & factory) +{ + factory.registerStorage("GenerateRandom", [](const StorageFactory::Arguments & args) + { + ASTs & engine_args = args.engine_args; + + if (engine_args.size() > 3) + throw Exception("Storage GenerateRandom requires at most three arguments: "\ + "max_array_length, max_string_length, random_seed.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + UInt64 max_array_length_ = 10; + UInt64 max_string_length_ = 10; + UInt64 random_seed_ = 0; // zero for random + + /// Parsing second argument if present + if (engine_args.size() >= 1) + max_array_length_ = engine_args[0]->as().value.safeGet(); + + if (engine_args.size() >= 2) + max_string_length_ = engine_args[1]->as().value.safeGet(); + + if (engine_args.size() == 3) + random_seed_ = engine_args[2]->as().value.safeGet(); + + return StorageGenerateRandom::create(args.table_id, args.columns, max_array_length_, max_string_length_, random_seed_); + }); +} + +Pipes StorageGenerateRandom::read( + const Names & column_names, + const SelectQueryInfo & /*query_info*/, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned num_streams) +{ + check(column_names, true); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & columns_ = getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = columns_.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + /// Will create more seed values for each source from initial seed. + pcg64_fast generate(random_seed); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header, context)); + + return pipes; +} + +} diff --git a/dbms/src/Storages/StorageGenerate.h b/dbms/src/Storages/StorageGenerateRandom.h similarity index 65% rename from dbms/src/Storages/StorageGenerate.h rename to dbms/src/Storages/StorageGenerateRandom.h index 4bb955bbabe..7622099dcbb 100644 --- a/dbms/src/Storages/StorageGenerate.h +++ b/dbms/src/Storages/StorageGenerateRandom.h @@ -8,11 +8,11 @@ namespace DB { /* Generates random data for given schema. */ -class StorageGenerate : public ext::shared_ptr_helper, public IStorage +class StorageGenerateRandom : public ext::shared_ptr_helper, public IStorage { - friend struct ext::shared_ptr_helper; + friend struct ext::shared_ptr_helper; public: - std::string getName() const override { return "Generate"; } + std::string getName() const override { return "GenerateRandom"; } Pipes read( const Names & column_names, @@ -28,7 +28,7 @@ private: UInt64 random_seed = 0; protected: - StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_, + StorageGenerateRandom(const StorageID & table_id_, const ColumnsDescription & columns_, UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed); }; diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.cpp b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp similarity index 82% rename from dbms/src/TableFunctions/TableFunctionGenerate.cpp rename to dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp index c68206c3d51..327e941508a 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include "registerTableFunctions.h" @@ -25,7 +25,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const +StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const { ASTs & args_func = ast_function->children; @@ -63,14 +63,14 @@ StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const ColumnsDescription columns = parseColumnsListFromString(structure, context); - auto res = StorageGenerate::create(StorageID(getDatabaseName(), table_name), columns, max_array_length, max_string_length, random_seed); + auto res = StorageGenerateRandom::create(StorageID(getDatabaseName(), table_name), columns, max_array_length, max_string_length, random_seed); res->startup(); return res; } void registerTableFunctionGenerate(TableFunctionFactory & factory) { - factory.registerFunction(TableFunctionFactory::CaseInsensitive); + factory.registerFunction(); } } diff --git a/dbms/src/TableFunctions/TableFunctionGenerate.h b/dbms/src/TableFunctions/TableFunctionGenerateRandom.h similarity index 52% rename from dbms/src/TableFunctions/TableFunctionGenerate.h rename to dbms/src/TableFunctions/TableFunctionGenerateRandom.h index e4c7f9beeba..042a5c59dbe 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerate.h +++ b/dbms/src/TableFunctions/TableFunctionGenerateRandom.h @@ -4,12 +4,14 @@ namespace DB { -/* generate(structure, [max_array_length, max_string_length, random_seed]) - creates a temporary storage that generates columns with random data + +/* generateRandom(structure, [max_array_length, max_string_length, random_seed]) + * - creates a temporary storage that generates columns with random data */ -class TableFunctionGenerate : public ITableFunction +class TableFunctionGenerateRandom : public ITableFunction { public: - static constexpr auto name = "generate"; + static constexpr auto name = "generateRandom"; std::string getName() const override { return name; } private: StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override; diff --git a/dbms/tests/performance/generate_table_function.xml b/dbms/tests/performance/generate_table_function.xml index 02dcbc493df..b703424ba49 100644 --- a/dbms/tests/performance/generate_table_function.xml +++ b/dbms/tests/performance/generate_table_function.xml @@ -8,23 +8,23 @@ - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i UUID', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generate('i String', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i FixedString(4)', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i String', 10, 10, 1) LIMIT 100000); From e30173f54be89c709a4cf10c31180397f9d81de9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 05:19:28 +0300 Subject: [PATCH 160/215] Fixed build --- dbms/src/Storages/registerStorages.cpp | 2 +- dbms/src/Storages/registerStorages.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index e9601577f1d..f5fab52285d 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -29,7 +29,7 @@ void registerStorages() registerStorageView(factory); registerStorageMaterializedView(factory); registerStorageLiveView(factory); - registerStorageGenerate(factory); + registerStorageGenerateRandom(factory); #if USE_AWS_S3 registerStorageS3(factory); diff --git a/dbms/src/Storages/registerStorages.h b/dbms/src/Storages/registerStorages.h index ed00225104f..63a758f5b38 100644 --- a/dbms/src/Storages/registerStorages.h +++ b/dbms/src/Storages/registerStorages.h @@ -23,7 +23,7 @@ void registerStorageJoin(StorageFactory & factory); void registerStorageView(StorageFactory & factory); void registerStorageMaterializedView(StorageFactory & factory); void registerStorageLiveView(StorageFactory & factory); -void registerStorageGenerate(StorageFactory & factory); +void registerStorageGenerateRandom(StorageFactory & factory); #if USE_AWS_S3 void registerStorageS3(StorageFactory & factory); From d893eace1723a28597d81700f707927ca9765173 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 05:19:40 +0300 Subject: [PATCH 161/215] Update tests --- .../0_stateless/01087_storage_generate.sql | 4 +- .../01087_table_function_generate.sql | 76 +++++++++---------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.sql b/dbms/tests/queries/0_stateless/01087_storage_generate.sql index 46d49dc165f..54ecd3007a9 100644 --- a/dbms/tests/queries/0_stateless/01087_storage_generate.sql +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS test_table; -CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Generate(); +CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=GenerateRandom(); SELECT COUNT(*) FROM (SELECT * FROM test_table LIMIT 100); DROP TABLE IF EXISTS test_table; @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Generate(3, 5, 10); +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=GenerateRandom(3, 5, 10); SELECT * FROM test_table_2 LIMIT 100; diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.sql b/dbms/tests/queries/0_stateless/01087_table_function_generate.sql index 0329fa81bf1..6891dd94520 100644 --- a/dbms/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.sql @@ -3,173 +3,173 @@ SELECT toTypeName(ui32), toTypeName(i32), toTypeName(ui16), toTypeName(i16), toTypeName(ui8), toTypeName(i8) -FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') +FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 1; SELECT ui64, i64, ui32, i32, ui16, i16, ui8, i8 -FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) +FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)') +FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)') LIMIT 1; SELECT i -FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) +FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))') +FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))') LIMIT 1; SELECT i -FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) +FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i)s -FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))') +FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))') LIMIT 1; SELECT i -FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) +FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(d), toTypeName(dt), toTypeName(dtm) -FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')') +FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')') LIMIT 1; SELECT d, dt, dtm -FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) +FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(dt64), toTypeName(dts64), toTypeName(dtms64) -FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')') +FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')') LIMIT 1; SELECT dt64, dts64, dtms64 -FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) +FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(f32), toTypeName(f64) -FROM generate('f32 Float32, f64 Float64') +FROM generateRandom('f32 Float32, f64 Float64') LIMIT 1; SELECT f32, f64 -FROM generate('f32 Float32, f64 Float64', 10, 10, 1) +FROM generateRandom('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(d32), toTypeName(d64), toTypeName(d64) -FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)') +FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)') LIMIT 1; SELECT d32, d64, d128 -FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) +FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Tuple(Int32, Int64)') +FROM generateRandom('i Tuple(Int32, Int64)') LIMIT 1; SELECT i -FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) +FROM generateRandom('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Int8)') +FROM generateRandom('i Array(Int8)') LIMIT 1; SELECT i -FROM generate('i Array(Int8)', 10, 10, 1) +FROM generateRandom('i Array(Int8)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Nullable(Int32))') +FROM generateRandom('i Array(Nullable(Int32))') LIMIT 1; SELECT i -FROM generate('i Array(Nullable(Int32))', 10, 10, 1) +FROM generateRandom('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Tuple(Int32, Array(Int64))') +FROM generateRandom('i Tuple(Int32, Array(Int64))') LIMIT 1; SELECT i -FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) +FROM generateRandom('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Nullable(String)', 1) +FROM generateRandom('i Nullable(String)', 1) LIMIT 1; SELECT i -FROM generate('i Nullable(String)', 10, 10, 1) +FROM generateRandom('i Nullable(String)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(String)') +FROM generateRandom('i Array(String)') LIMIT 1; SELECT i -FROM generate('i Array(String)', 10, 10, 1) +FROM generateRandom('i Array(String)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i UUID') +FROM generateRandom('i UUID') LIMIT 1; SELECT i -FROM generate('i UUID', 10, 10, 1) +FROM generateRandom('i UUID', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i Array(Nullable(UUID))') +FROM generateRandom('i Array(Nullable(UUID))') LIMIT 1; SELECT i -FROM generate('i Array(Nullable(UUID))', 10, 10, 1) +FROM generateRandom('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i FixedString(4)') +FROM generateRandom('i FixedString(4)') LIMIT 1; SELECT i -FROM generate('i FixedString(4)', 10, 10, 1) +FROM generateRandom('i FixedString(4)', 10, 10, 1) LIMIT 10; SELECT '-'; SELECT toTypeName(i) -FROM generate('i String') +FROM generateRandom('i String') LIMIT 1; SELECT i -FROM generate('i String', 10, 10, 1) +FROM generateRandom('i String', 10, 10, 1) LIMIT 10; SELECT '-'; DROP TABLE IF EXISTS test_table; CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Memory; -INSERT INTO test_table SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 2, 10, 1) +INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 2, 10, 1) LIMIT 10; SELECT * FROM test_table; @@ -180,7 +180,7 @@ SELECT '-'; DROP TABLE IF EXISTS test_table_2; CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)) ENGINE=Memory; -INSERT INTO test_table_2 SELECT * FROM generate('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)', 3, 5, 10) +INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)', 3, 5, 10) LIMIT 10; SELECT * FROM test_table_2; From ca24156be9d46a86e37c904a07cee40c1aa319e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 05:19:55 +0300 Subject: [PATCH 162/215] Updated docs --- docs/en/operations/table_engines/generate.md | 8 ++++---- docs/en/query_language/table_functions/generate.md | 6 +++--- docs/toc_en.yml | 3 ++- docs/toc_fa.yml | 3 ++- docs/toc_ja.yml | 3 ++- docs/toc_ru.yml | 3 ++- docs/toc_zh.yml | 3 ++- 7 files changed, 17 insertions(+), 12 deletions(-) diff --git a/docs/en/operations/table_engines/generate.md b/docs/en/operations/table_engines/generate.md index 126acb05626..bdf52f84ac1 100644 --- a/docs/en/operations/table_engines/generate.md +++ b/docs/en/operations/table_engines/generate.md @@ -1,6 +1,6 @@ -# Generate {#table_engines-generate} +# GenerateRandom {#table_engines-generate} -The Generate table engine produces random data for given table schema. +The GenerateRandom table engine produces random data for given table schema. Usage examples: @@ -10,7 +10,7 @@ Usage examples: ## Usage in ClickHouse Server ```sql -Generate(max_array_length, max_string_length, random_seed) +ENGINE = GenerateRandom(max_array_length, max_string_length, random_seed) ``` The `max_array_length` and `max_string_length` parameters specify maximum length of all @@ -25,7 +25,7 @@ It supports all [DataTypes](../../data_types/index.md) that can be stored in a t **1.** Set up the `generate_engine_table` table: ```sql -CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE=Generate(3, 5, 1) +CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE=GenerateRandom(3, 5, 1) ``` **2.** Query the data: diff --git a/docs/en/query_language/table_functions/generate.md b/docs/en/query_language/table_functions/generate.md index ed9e2150b03..2f43bf453db 100644 --- a/docs/en/query_language/table_functions/generate.md +++ b/docs/en/query_language/table_functions/generate.md @@ -1,11 +1,11 @@ -# generate +# generateRandom Generates random data with given schema. Allows to populate test tables with data. Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`. ```sql -generate('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); +generateRandom('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); ``` **Parameters** @@ -25,7 +25,7 @@ A table object with requested schema. ```sql -SELECT * FROM generate('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); +SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); ``` ```text ┌─a────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐ diff --git a/docs/toc_en.yml b/docs/toc_en.yml index fc8b9b99ee8..60ef9655e17 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -75,6 +75,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'GenerateRandom': 'operations/table_engines/generate.md' - 'Database Engines': - 'Introduction': 'database_engines/index.md' @@ -143,7 +144,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'generate': 'query_language/table_functions/generate.md' + - 'generateRandom': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index 0e5604d1521..d1fde93fcdb 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -109,6 +109,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'GenerateRandom': 'operations/table_engines/generate.md' - 'SQL Reference': - 'hidden': 'query_language/index.md' @@ -170,7 +171,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'generate': 'query_language/table_functions/generate.md' + - 'generateRandom': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index fea1f8780ce..319a1a140a6 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -79,6 +79,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'GenerateRandom': 'operations/table_engines/generate.md' - 'SQL Reference': - 'hidden': 'query_language/index.md' @@ -142,7 +143,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'generate': 'query_language/table_functions/generate.md' + - 'generateRandom': 'query_language/table_functions/generate.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 99af6d02545..b2327515250 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -80,6 +80,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'GenerateRandom': 'operations/table_engines/generate.md' - 'Справка по SQL': - 'hidden': 'query_language/index.md' @@ -143,7 +144,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'generate': 'query_language/table_functions/generate.md' + - 'generateRandom': 'query_language/table_functions/generate.md' - 'Словари': - 'Введение': 'query_language/dicts/index.md' - 'Внешние словари': diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index bd5c4308bce..3d8fd5004f7 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -109,6 +109,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'GenerateRandom': 'operations/table_engines/generate.md' - 'SQL语法': - 'hidden': 'query_language/index.md' @@ -170,7 +171,7 @@ nav: - 'odbc': 'query_language/table_functions/odbc.md' - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - - 'generate': 'query_language/table_functions/generate.md' + - 'generateRandom': 'query_language/table_functions/generate.md' - '字典': - '介绍': 'query_language/dicts/index.md' - '外部字典': From 12123390236677ffaf0e63de4f3306b046a64b59 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 05:34:35 +0300 Subject: [PATCH 163/215] Fixed errors --- dbms/src/Storages/StorageGenerateRandom.cpp | 36 ++++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index d77c9f336e9..15634a9ae76 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -63,9 +63,23 @@ ColumnPtr fillColumnWithRandomData( { case TypeIndex::String: { - Block block{ColumnWithTypeAndName{nullptr, type, "result"}}; - FunctionFactory::instance().get("randomPrintableASCII", context)->build({})->execute(block, {}, 0, limit); - return block.getByPosition(0).column; + auto size_column = ColumnUInt32::create(); + auto & sizes = size_column->getData(); + + sizes.resize(limit); + for (UInt64 i = 0; i < limit; ++i) + sizes[i] = static_cast(rng()) % max_string_length; /// Slow + + ColumnWithTypeAndName argument{std::move(size_column), std::make_shared(), "size"}; + + Block block + { + argument, + {nullptr, type, "result"} + }; + + FunctionFactory::instance().get("randomPrintableASCII", context)->build({argument})->execute(block, {0}, 1, limit); + return block.getByPosition(1).column; } case TypeIndex::Enum8: @@ -238,8 +252,7 @@ ColumnPtr fillColumnWithRandomData( fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal32), rng); return column; } - case TypeIndex::Decimal64: [[fallthrough]]; - case TypeIndex::DateTime64: + case TypeIndex::Decimal64: { auto column = type->createColumn(); auto & column_concrete = typeid_cast &>(*column); @@ -255,6 +268,19 @@ ColumnPtr fillColumnWithRandomData( fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal128), rng); return column; } + case TypeIndex::DateTime64: + { + auto column = type->createColumn(); + auto & column_concrete = typeid_cast &>(*column); + column_concrete.getData().resize(limit); + + UInt64 range = (1ULL << 32) * intExp10(typeid_cast(*type).getScale()); + + for (size_t i = 0; i < limit; ++i) + column_concrete.getData()[i] = rng() % range; /// Slow + + return column; + } default: throw Exception("The 'GenerateRandom' is not implemented for type " + type->getName(), ErrorCodes::NOT_IMPLEMENTED); From 12596256aca452ad6eda580a428f0adf8bf308a6 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 6 Mar 2020 16:31:50 +0800 Subject: [PATCH 164/215] Fixed test cases of parsing/formatting DateTime64 --- .../gtest_DateTime64_parsing_and_writing.cpp | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/dbms/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp b/dbms/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp index 04fdb6f4a34..fd94c9c9eee 100644 --- a/dbms/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp +++ b/dbms/src/IO/tests/gtest_DateTime64_parsing_and_writing.cpp @@ -18,7 +18,7 @@ struct DateTime64StringsTestParam const std::string_view string; DateTime64 dt64; UInt32 scale; - const DateLUTImpl & timezone = DateLUT::instance(); + const DateLUTImpl & timezone; }; static std::ostream & operator << (std::ostream & ostr, const DateTime64StringsTestParam & param) @@ -38,7 +38,7 @@ TEST_P(DateTime64StringParseTest, readDateTime64Text) ReadBufferFromMemory read_buffer(param.string.data(), param.string.size()); DateTime64 actual; - EXPECT_TRUE(tryReadDateTime64Text(actual, param.scale, read_buffer)); + EXPECT_TRUE(tryReadDateTime64Text(actual, param.scale, read_buffer, param.timezone)); EXPECT_EQ(param.dt64, actual); } @@ -61,7 +61,7 @@ TEST_P(DateTime64StringWriteTest, WriteText) PaddedPODArray actual_string(param.string.size() * 2, '\0'); // TODO: detect overflows WriteBuffer write_buffer(actual_string.data(), actual_string.size()); - EXPECT_NO_THROW(writeDateTimeText(param.dt64, param.scale, write_buffer)); + EXPECT_NO_THROW(writeDateTimeText(param.dt64, param.scale, write_buffer, param.timezone)); EXPECT_STREQ(param.string.data(), actual_string.data()); } @@ -86,49 +86,57 @@ INSTANTIATE_TEST_SUITE_P(Basic, "When subsecond part is missing from string it is set to zero.", "2019-09-16 19:20:17", 1568650817'000, - 3 + 3, + DateLUT::instance("Europe/Minsk") }, { "When subsecond part is present in string, but it is zero, it is set to zero.", "2019-09-16 19:20:17.0", 1568650817'000, - 3 + 3, + DateLUT::instance("Europe/Minsk") }, { "When scale is 0, subsecond part is not set.", "2019-09-16 19:20:17", 1568650817ULL, - 0 + 0, + DateLUT::instance("Europe/Minsk") }, { "When scale is 0, subsecond part is 0 despite beeing present in string.", "2019-09-16 19:20:17.123", 1568650817ULL, - 0 + 0, + DateLUT::instance("Europe/Minsk") }, { "When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.", "2019-09-16 19:20:17.123", 1568650817'123, - 3 + 3, + DateLUT::instance("Europe/Minsk") }, { "When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.", "2019-09-16 19:20:17.012", 1568650817'012, - 3 + 3, + DateLUT::instance("Europe/Minsk") }, { "When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).", "2019-09-16 19:20:17.123", 1568650817'12300ULL, - 5 + 5, + DateLUT::instance("Europe/Minsk") }, { "When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.", "2019-09-16 19:20:17.123", 1568650817'1ULL, - 1 + 1, + DateLUT::instance("Europe/Minsk") } }) ); @@ -137,10 +145,11 @@ INSTANTIATE_TEST_SUITE_P(BestEffort, DateTime64StringParseBestEffortTest, ::testing::ValuesIn(std::initializer_list{ { - "When subsecond part is unreasonably large, it fals to parse", + "When subsecond part is unreasonably large, it truncated to given scale", "2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364", 1568650817'123456ULL, - 6 + 6, + DateLUT::instance("Europe/Minsk") } }) ); @@ -155,31 +164,36 @@ INSTANTIATE_TEST_SUITE_P(Basic, "non-zero subsecond part on DateTime64 with scale of 3", "2019-09-16 19:20:17.123", 1568650817'123, - 3 + 3, + DateLUT::instance("Europe/Minsk") }, { "non-zero subsecond part on DateTime64 with scale of 5", "2019-09-16 19:20:17.12345", 1568650817'12345ULL, - 5 + 5, + DateLUT::instance("Europe/Minsk") }, { "Zero subsecond part is written to string", "2019-09-16 19:20:17.000", 1568650817'000ULL, - 3 + 3, + DateLUT::instance("Europe/Minsk") }, { "When scale is 0, subsecond part (and separtor) is missing from string", "2019-09-16 19:20:17", 1568650817ULL, - 0 + 0, + DateLUT::instance("Europe/Minsk") }, { "Subsecond part with leading zeroes is written to string correctly", "2019-09-16 19:20:17.001", 1568650817'001ULL, - 3 + 3, + DateLUT::instance("Europe/Minsk") } }) ); From 36de225bb2803e5e9a89446286f91b703d53561b Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Fri, 6 Mar 2020 13:50:43 +0300 Subject: [PATCH 165/215] CLICKHOUSEDOCS-548: Adopters --- docs/en/introduction/adopters.md | 177 ++++--------------------------- 1 file changed, 22 insertions(+), 155 deletions(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index eafb9d6d0fb..e0d409d9cfc 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -6,9 +6,12 @@ | Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size* | Reference | | --- | --- | --- | --- | --- | --- | | [2gis](https://2gis.ru) | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | +| [Aloha Browser](https://alohabrowser.com/) | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | | [Amadeus](https://amadeus.com/) | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | | [Appsflyer](https://www.appsflyer.com) | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| [ArenaData](https://arenadata.tech/) | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | | [Badoo](https://badoo.com) | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | +| [Benocs](https://www.benocs.com/) | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | | [Bloomberg](https://www.bloomberg.com/) | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | | [Bloxy](https://bloxy.info) | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | | `Dataliance/UltraPower` | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | @@ -20,190 +23,54 @@ | [ContentSquare](https://contentsquare.com) | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | [Cloudflare](https://cloudflare.com) | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | | [Corunet](https://coru.net/) | Analytics | Main product | — | — | [Slides in English, April 2019 ](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| [Criteo/Storetail] | Retail | Main product | — | — | [Slides in English, October 2018 ](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| [Deutsche Bank](db.com) | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| [CraiditX 氪信](https://creditx.com) | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| [Criteo/Storetail](https://www.criteo.com/) | Retail | Main product | — | — | [Slides in English, October 2018 ](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| [Deutsche Bank](https://db.com) | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| [Diva-e](https://www.diva-e.com) | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | | [Exness](https://www.exness.com) | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | [Geniee](https://geniee.co.jp) | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | [HUYA](https://www.huya.com/) | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](www.idealista.com) | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Idealista](https://www.idealista.com) | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Infovista](https://www.infovista.com/) | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| [InnoGames](https://www.innogames.com) | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| [Integros](https://integros.com) | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | | [Kodiak Data](https://www.kodiakdata.com/) | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | | [Kontur](https://kontur.ru) | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | | [LifeStreet](https://cloudflare.com) | Ad network | Main product | 60 servers in 3 replicas | 2-2.5 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | [Mail.ru Cloud Solutions](https://mcs.mail.ru/) | Cloud services | Main product | — | — | [Running ClickHouse Instance, in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | — | — | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | | [MessageBird](https://www.messagebird.com) | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | — | — | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | | [OneAPM](https://www.oneapm.com/) | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | | [Pragma Innovation](http://www.pragma-innovation.fr/) | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | | [QINGCLOUD](https://www.qingcloud.com/) | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | [Qrator](https://qrator.net) | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| [Rambler](rambler.ru) | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| [Percent百分点](https://www.percent.cn/) | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| [Rambler](https://rambler.ru) | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | [Tencent](https://www.tencent.com) | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | [Traffic Stars](https://trafficstars.com/) | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | | [S7 Airlines](https://www.s7.ru) | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | | [SEMrush](https://www.semrush.com/) | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | | [scireum GmbH](https://www.scireum.de/) | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | | [Sentry](https://sentry.io/) | Software developer | Backend for product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr) | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| [seo.do](https://seo.do/) | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | | [Sina](http://english.sina.com/index.html) | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | | [SMI2](https://smi2.ru/) | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | | [Splunk](https://www.splunk.com/) | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | | [Spotify](https://www.spotify.com) | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | | [Tencent](https://www.tencent.com) | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | - Нераспознанный китайский источник https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf | [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/ml.pdf) | -| [VKontakte](vk.com) | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| [VKontakte](https://vk.com) | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| [Wisebits](https://wisebits.com/) | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| [Xiaoxin Tech.](https://www.xiaoheiban.cn/) | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| [Ximalaya](https://www.ximalaya.com/) | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | | [Yandex Cloud](https://cloud.yandex.ru/services/managed-clickhouse) | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | | [Yandex DataLens](https://cloud.yandex.ru/services/datalens) | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | | [Yandex Market](https://market.yandex.ru/) | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | | [Yandex Metrica](https://metrica.yandex.com) | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | | [ЦВТ](https://htc-cs.ru/) | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| --- | --- | --- | --- | --- | --- | -| --- | --- | --- | --- | --- | --- | +| [МКБ](https://mkb.ru/) | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| [金数据](https://jinshuju.net) | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | - -### Not checked mentions - -- Bioinformatics - evolutionary genetics: -https://github.com/msestak/FindOrigin - -"We are exploring evolution of novel genes in genomes because if seems that genomes are far from being static as previously believed and what actually happens is that new genes are constantly being added and old genes are lost." - -- Search engine and analytics for Bitcoin transactions: -https://blockchair.com/ - -"We have quite large tables on just single server and everything works really fast &mdash with any filters and sorting everything is processed just instantly." - -- https://www.octonica.ru/ презентация https://github.com/ClickHouse/clickhouse-presentations/blob/master/database_saturday_2018_2/octonica/meetup.pptx - - -Yandex.Mail is using ClickHouse to record user activity logs in a web interface for investigations and analytics. - -Yandex.Browser is using ClickHouse for performance histograms. Browsers send many mini-histograms from clients. -They are all stored into ClickHouse for the purpose of Browser version comparisons, analytics and investigations. - -- Infinidat https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/clickhouse_in_the_world.pptx -- Vertamedia -- NVidia -- Percona https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/clickhouse_in_the_world.pptx Но это не компания, это разработчик тулзов для OpenSource. https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup14/analyzing_mysql_logs.pptx -- BI Tableau https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/tableau.pptx December 2017? это дата загрузки презентации. -- https://my.com/ @mail.ru https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup11/target.pdf -- [Эрливидео](https://flussonic.ru/) https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/erlyvideo.key -- mipt https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/mipt.pdf - -### Stash - -- [СБИС, Тензор](https://sbis.ru/about), Analytics. В своих вакансиях регулярно указывают ClickHouse + есть задание для студентов 2018 https://tensor.ru/stipend2018/projects Active Manager - Анализатор активности на странице, где подразумевается использование ClickHouse. - -- Компания Republer https://republer.com/ О том, что они используют ClickHouse видно из вакансии https://telegram.me/nodejs_jobs_feed/607 - -- Kaspersky заявлен в нескольких презентациях, Однако живых публичных источников найти не получается. Поисковики приводят на сайт вакансий Касперского, но в явном виде ClickHouse там нигде не появляется. Есть https://careers.kaspersky.ru/tech/, поиск, на котором приводит на вакансию девопса, при этом в описании вакансии ClickHouse не упоминается. Вот такая штука есть https://sudonull.com/post/76060-The-second-mitap-of-the-Rust-community-in-Kaspersky-Lab-Kaspersky-Labs-blog - -- Мегафон. На сайтах вакансий от него есть несколько вакансий с ClickHouse типа https://ekaterinburg.hh.ru/vacancy/35891497?utm_source=jooble.org&utm_medium=meta&utm_campaign=RU_paid_cpc_applicant_feed_magic1 и есть вот такая хрень https://newprolab.com/ru/dataengineer-megafon, с которой непонятно, что делать. Вакансии не кажутся хоть сколько-нибудь надёжным источником, поскольку сейчас есть - завтра нет. Как у касперского. - - -- [Quantrum.Team](https://quantrum.me). Непонятное комьюнити трейдеров, один из которых решил попробовать ClickHouse. https://quantrum.me/1709-clickhouse-i-python-dlya-xraneniya-istorii-cen/ - -- https://severalnines.com/database-blog/hybrid-oltpanalytics-database-workloads-replicating-mysql-data-clickhouse какая-то компания типа альтинити видимо. - -- В презентациях есть Wikimedia Foundation, но реальных упоминаний об этом не нашел. Есть какой-то ответ в блоге https://www.mail-archive.com/analytics@lists.wikimedia.org/msg03619.html, но он не указывает прямо на использование ClickHouse. Вот этот вот указывает, но я с ходу не разобрал, что за источник вообще такой https://phabricator.wikimedia.org/T150343 - -- [Mercadona](mercadona.com) не нашел ни единой связи с ClickHouse. - -- [Zara](zara.com) не нашел связи с ClickHouse. - -- ByteDance, единственное прямое доказательство было ссылкой на уже не существующую вакансию. - -- Booking.com. Экспериментальная поддержка ClickHouse в https://github.com/bookingcom/carbonapi - -- [Roistat](https://roistat.com) разработала [Go ClickHouse Connector](https://github.com/roistat/go-clickhouse) и поддерживает свой [Docker](https://github.com/roistat/docker-clickhouse). - -- JD.com ClickHouse Meetup in Beijing 2019 -- KuaiShou ClickHouse Meetup in Beijing 2018 - -- JetBrains DataGrip ? https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/6_datagrip.pdf - [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) - - - - -- bdtc_2019 -+ cern (Mail.ru, MGID,) -- cpp_russia_2019 -- cpp_sprint_2019 -- data_at_scale -- database_saturday_2018 -- database_saturday_2018_2/pipeline -+ database_saturday_2018_2/octonica -- database_saturday_2019 -+ dataops_2019 (CARTO, Mercadona, Zara, Idealista, Corunet, ... Cloudflare, Spotify, Amadeus, Bloomberg, Cisco, Deutsche Bank, Tencent, ByteDance) -+ drafts (CloudFlare, Booking.com, Crobox, Rambler, QRator, СКБ Контур, Roistat, SMI2) -+ evolution (ByteDance, Sina, Tencent, JD.com, KuaiShou) -- group_by -- hash_tables -- highload2016 -- highload2017 -- highload2018 -- highload2019 -- highload_siberia_2018 -- highload_siberia_2019 -- highload_spb_2019 -- hse_2019 -- internals -- it_rzn_2019 -+ meetup10 -+ meetup11 -+ meetup12 -+ meetup13 -- meetup14 -+ meetup15 -- meetup16 -+ meetup17 -+ meetup18 -+ meetup19 -meetup20 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup21 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup22 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup23 Added presentations from ClickHouse Meetup in San Francisco 9 months ago -meetup24 yandex/ClickHouse -> ClickHouse/ClickHouse, part 2 5 months ago -meetup25 Add lost slides from Novosibirsk 8 months ago -meetup26 add one more slide deck from Minsk meetup 8 months ago -meetup27 Added more presentations from 27th meetup. 7 months ago -meetup28 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup29 Merge branch 'master' of github.com:ClickHouse/clickhouse-presentations 5 months ago -meetup3 Presentation: added analysts part [#CLICKHOUSE-2]. 3 years ago -meetup30 Add more slides from Paris Meetup 4 months ago -meetup31 Correction on KuaiShou company name 3 months ago -meetup32 Correction on KuaiShou company name 3 months ago -meetup33 Correction on KuaiShou company name 3 months ago -meetup34 Add tokyo meetup 3 months ago -meetup35 Added half of presentations from ClickHouse Instanbul Meetup 3 months ago -meetup36/new_features Added a presentation from 36th ClickHouse Meetup 3 months ago -meetup37 Moved presentation from 37th ClickHouse Meetup 3 months ago -meetup38 Remaining Moscow meetup slides 3 months ago -meetup39 Remaining slides from SF meetup 14 days ago -meetup4 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup40 More slides from NYC meetup 19 days ago -meetup5 Added presentation from 5th meetup [#CLICKHOUSE-3]. 3 years ago -meetup6 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup7 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -meetup9 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -misc Added resized images for "evolution" article. 3 years ago -percona2017 percona2017 3 years ago -percona2018 Added part of presentations from Percona 2018 and Sunnyvale Meetup 2 years ago -percona2019 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -percona_europe_2017 add presentation from Percona Europe 2017 2 years ago -percona_europe_2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -percona_europe_2019 Merge branch 'master' of github.com:ClickHouse/clickhouse-presentations 5 months ago -pgday2017 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -rit2017 add sources for RIT++2017 3 years ago -rit2018 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -rit2019 Add rit2019 presentation 8 months ago -roadmap2018 Added roadmap for 2018..2019 15 months ago -shad2017 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -tbd yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -tutorials Create catboost_with_clickhouse_en.md 2 years ago -unknown_developers_reissue Added slightly modified version of "Unknown developers" presentation 15 months ago -uwdc yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -yatalks_2019 yandex/ClickHouse -> ClickHouse/ClickHouse, part 1 5 months ago -yatalks_2019_moscow Fixed error in presentation from YaTalks 2019 in Moscow 3 months ago From 117845b2be10460952776a8c2b1a7307cd857517 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Fri, 6 Mar 2020 15:44:22 +0300 Subject: [PATCH 166/215] CLICKHOUSEDOCS-548: Repaired master build. --- .../operations/settings/query_complexity.md | 24 +++++++++++++++++-- docs/en/operations/settings/settings.md | 2 +- .../operations/settings/query_complexity.md | 4 ++-- docs/ru/operations/settings/settings.md | 2 +- docs/toc_en.yml | 1 + docs/toc_fa.yml | 1 + docs/toc_ja.yml | 4 ++-- docs/toc_ru.yml | 1 + docs/toc_zh.yml | 1 + 9 files changed, 32 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index e5f993540a7..e20d2fe926d 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -96,7 +96,7 @@ Maximum number of bytes before sorting. What to do if the number of rows received before sorting exceeds one of the limits: 'throw' or 'break'. By default, throw. -## max_result_rows +## max_result_rows {#setting-max_result_rows} Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. @@ -107,7 +107,27 @@ Limit on the number of bytes in the result. The same as the previous setting. ## result_overflow_mode What to do if the volume of the result exceeds one of the limits: 'throw' or 'break'. By default, throw. -Using 'break' is similar to using LIMIT. + + +Using 'break' is similar to using LIMIT. Break interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](settings.md#setting-max_block_size) and depends on [max_threads](settings.md#settings-max_threads). + +Пример: +```sql +SET max_threads = 3, max_block_size = 3333; +SET max_result_rows = 3334, result_overflow_mode = 'break'; + +SELECT * +FROM numbers_mt(100000) +FORMAT Null; +``` + +Результат: + +```text +6666 rows in set. ... +``` + + ## max_execution_time diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4928fba1ca8..6afba6f4fa4 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -406,7 +406,7 @@ Possible values: Default value: 1. -## max_block_size +## max_block_size {#setting-max_block_size} In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads, and to preserve at least some cache locality. diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index 3d0c8dd9c38..01ab8745840 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -97,7 +97,7 @@ Что делать, если количество строк, полученное перед сортировкой, превысило одно из ограничений: throw или break. По умолчанию: throw. -## max_result_rows +## max_result_rows {#setting-max_result_rows} Ограничение на количество строк результата. Проверяются также для подзапросов и на удалённых серверах при выполнении части распределённого запроса. @@ -109,7 +109,7 @@ Что делать, если объём результата превысил одно из ограничений: throw или break. По умолчанию: throw. -Использование break по смыслу похоже на LIMIT. Break прерывает выполнение только на уровне блока. Т.е. число строк которые вернет запрос будет больше чем ограничение [max_result_rows](#max_result_rows), кратно [max_block_size](settings.md#max_block_size) и зависит от [max_threads](settings.md#settings-max_threads). +Использование break по смыслу похоже на LIMIT. Break прерывает выполнение только на уровне блока. Т.е. число строк которые вернет запрос будет больше чем ограничение [max_result_rows](#setting-max_result_rows), кратно [max_block_size](settings.md#setting-max_block_size) и зависит от [max_threads](settings.md#settings-max_threads). Пример: ```sql diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 298dd7364c3..80486502dcb 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -377,7 +377,7 @@ Ok. Значение по умолчанию: 0. -## max_block_size +## max_block_size {#setting-max_block_size} Данные в ClickHouse обрабатываются по блокам (наборам кусочков столбцов). Внутренние циклы обработки для одного блока достаточно эффективны, но есть заметные издержки на каждый блок. Настройка `max_block_size` — это рекомендация, какой размер блока (в количестве строк) загружать из таблиц. Размер блока не должен быть слишком маленьким, чтобы затраты на каждый блок были заметны, но не слишком велики, чтобы запрос с LIMIT, который завершается после первого блока, обрабатывался быстро. Цель состоит в том, чтобы не использовалось слишком много оперативки при вынимании большого количества столбцов в несколько потоков; чтобы оставалась хоть какая-нибудь кэш-локальность. diff --git a/docs/toc_en.yml b/docs/toc_en.yml index fc8b9b99ee8..98c3e113fb6 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -75,6 +75,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'Generate': 'operations/table_engines/generate.md' - 'Database Engines': - 'Introduction': 'database_engines/index.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index 0e5604d1521..47197de62d9 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -109,6 +109,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'Generate': 'operations/table_engines/generate.md' - 'SQL Reference': - 'hidden': 'query_language/index.md' diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index fea1f8780ce..916459b0e04 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -79,7 +79,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' - + - 'Generate': 'operations/table_engines/generate.md' - 'SQL Reference': - 'hidden': 'query_language/index.md' - 'Syntax': 'query_language/syntax.md' @@ -143,7 +143,7 @@ nav: - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - 'generate': 'query_language/table_functions/generate.md' -- 'Dictionaries': + - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - 'External Dictionaries': - 'General Description': 'query_language/dicts/external_dicts.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 99af6d02545..60d73e0d075 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -80,6 +80,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'Generate': 'operations/table_engines/generate.md' - 'Справка по SQL': - 'hidden': 'query_language/index.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index bd5c4308bce..2fe4e9763b6 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -109,6 +109,7 @@ nav: - 'MaterializedView': 'operations/table_engines/materializedview.md' - 'Memory': 'operations/table_engines/memory.md' - 'Buffer': 'operations/table_engines/buffer.md' + - 'Generate': 'operations/table_engines/generate.md' - 'SQL语法': - 'hidden': 'query_language/index.md' From 0a41e5237964ff08e7424752ad6632f67f34520e Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 6 Mar 2020 15:45:07 +0300 Subject: [PATCH 167/215] Update docs/en/introduction/adopters.md Co-Authored-By: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/en/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index e0d409d9cfc..d1c7a35cead 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -44,7 +44,7 @@ | [Pragma Innovation](http://www.pragma-innovation.fr/) | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | | [QINGCLOUD](https://www.qingcloud.com/) | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | [Qrator](https://qrator.net) | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| [Percent百分点](https://www.percent.cn/) | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| [Beijing PERCENT Information Technology Co., Ltd.](https://www.percent.cn/) | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | | [Rambler](https://rambler.ru) | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | [Tencent](https://www.tencent.com) | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | [Traffic Stars](https://trafficstars.com/) | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | From 6134a046cb1a20d3df527a0c3391e5ab98cf2673 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 6 Mar 2020 14:31:27 +0100 Subject: [PATCH 168/215] ignore weak symbol under mac because it must be defined --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c0d4cd5504d..2948225583c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,6 +214,10 @@ if (COMPILER_CLANG) # TODO investigate that set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer") + if (OS_DARWIN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-U,_inside_main") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-U,_inside_main") + endif() endif () option (ENABLE_LIBRARIES "Enable all libraries (Global default switch)" ON) From b3a6f6b3676a8a22f4a73b22162e918fb2fb2eb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 21:14:33 +0300 Subject: [PATCH 169/215] Revert "Simplification" This reverts commit f5518c0c439f0156812ae3f3c5bb1423c5c46d84. --- dbms/programs/server/HTTPHandler.cpp | 10 ++-- dbms/src/Interpreters/Context.cpp | 83 ++++++++++++++++++++-------- dbms/src/Interpreters/Context.h | 7 ++- 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index b360b0a89f4..a4c59ff9e25 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -141,15 +141,15 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti } -static uint32_t parseSessionTimeout( +static std::chrono::steady_clock::duration parseSessionTimeout( const Poco::Util::AbstractConfiguration & config, const HTMLForm & params) { - uint32_t session_timeout = config.getInt("default_session_timeout", 60); + unsigned session_timeout = config.getInt("default_session_timeout", 60); if (params.has("session_timeout")) { - uint32_t max_session_timeout = config.getUInt("max_session_timeout", 3600); + unsigned max_session_timeout = config.getUInt("max_session_timeout", 3600); std::string session_timeout_str = params.get("session_timeout"); ReadBufferFromString buf(session_timeout_str); @@ -162,7 +162,7 @@ static uint32_t parseSessionTimeout( ErrorCodes::INVALID_SESSION_TIMEOUT); } - return session_timeout; + return std::chrono::seconds(session_timeout); } @@ -275,7 +275,7 @@ void HTTPHandler::processQuery( std::shared_ptr session; String session_id; - uint32_t session_timeout; + std::chrono::steady_clock::duration session_timeout; bool session_is_set = params.has("session_id"); const auto & config = server.config(); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index aa688ff9dd5..dbc963e0a27 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -124,7 +124,7 @@ public: std::shared_ptr acquireSession( const String & session_id, Context & context, - uint32_t timeout, + std::chrono::steady_clock::duration timeout, bool throw_if_not_found) { std::unique_lock lock(mutex); @@ -162,7 +162,7 @@ public: void releaseSession(NamedSession & session) { std::unique_lock lock(mutex); - close_times.emplace(time(nullptr) + session.timeout, session.key); + scheduleCloseSession(session, lock); } private: @@ -178,11 +178,31 @@ private: } }; + /// TODO it's very complicated. Make simple std::map with time_t or boost::multi_index. using Container = std::unordered_map, SessionKeyHash>; - using CloseTimes = std::multimap; - + using CloseTimes = std::deque>; Container sessions; CloseTimes close_times; + std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1); + std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now(); + UInt64 close_cycle = 0; + + void scheduleCloseSession(NamedSession & session, std::unique_lock &) + { + /// Push it on a queue of sessions to close, on a position corresponding to the timeout. + /// (timeout is measured from current moment of time) + + const UInt64 close_index = session.timeout / close_interval + 1; + const auto new_close_cycle = close_cycle + close_index; + + if (session.close_cycle != new_close_cycle) + { + session.close_cycle = new_close_cycle; + if (close_times.size() < close_index + 1) + close_times.resize(close_index + 1); + close_times[close_index].emplace_back(session.key); + } + } void cleanThread() { @@ -192,32 +212,51 @@ private: while (true) { - closeSessions(lock); - if (cond.wait_for(lock, std::chrono::seconds(1), [this]() -> bool { return quit; })) + auto interval = closeSessions(lock); + + if (cond.wait_for(lock, interval, [this]() -> bool { return quit; })) break; } } /// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added. - void closeSessions(std::unique_lock &) + std::chrono::steady_clock::duration closeSessions(std::unique_lock & lock) { - time_t now = time(nullptr); - for (auto it = close_times.begin(); it != close_times.end();) + const auto now = std::chrono::steady_clock::now(); + + /// The time to close the next session did not come + if (now < close_cycle_time) + return close_cycle_time - now; /// Will sleep until it comes. + + const auto current_cycle = close_cycle; + + ++close_cycle; + close_cycle_time = now + close_interval; + + if (close_times.empty()) + return close_interval; + + auto & sessions_to_close = close_times.front(); + + for (const auto & key : sessions_to_close) { - if (it->first >= now) - break; + const auto session = sessions.find(key); - const auto session_it = sessions.find(it->second); - it = close_times.erase(it); - - if (session_it == sessions.end()) - continue; - - if (session_it->second.unique()) - sessions.erase(session_it); - else - close_times.emplace(now + session_it->second->timeout, session_it->second->key); /// Does not invalidate iterators. + if (session != sessions.end() && session->second->close_cycle <= current_cycle) + { + if (!session->second.unique()) + { + /// Skip but move it to close on the next cycle. + session->second->timeout = std::chrono::steady_clock::duration{0}; + scheduleCloseSession(*session->second, lock); + } + else + sessions.erase(session); + } } + + close_times.pop_front(); + return close_interval; } std::mutex mutex; @@ -481,7 +520,7 @@ void Context::enableNamedSessions() shared->named_sessions.emplace(); } -std::shared_ptr Context::acquireNamedSession(const String & session_id, uint32_t timeout, bool session_check) +std::shared_ptr Context::acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) { if (!shared->named_sessions) throw Exception("Support for named sessions is not enabled", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 13053e4c8f2..5b5b8bdabd5 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -426,7 +426,7 @@ public: /// The method must be called at the server startup. void enableNamedSessions(); - std::shared_ptr acquireNamedSession(const String & session_id, uint32_t timeout, bool session_check); + std::shared_ptr acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check); /// For methods below you may need to acquire a lock by yourself. std::unique_lock getLock() const; @@ -672,11 +672,12 @@ using NamedSessionKey = std::pair; struct NamedSession { NamedSessionKey key; + UInt64 close_cycle = 0; Context context; - uint32_t timeout; + std::chrono::steady_clock::duration timeout; NamedSessions & parent; - NamedSession(NamedSessionKey key_, Context & context_, uint32_t timeout_, NamedSessions & parent_) + NamedSession(NamedSessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, NamedSessions & parent_) : key(key_), context(context_), timeout(timeout_), parent(parent_) { } From 42cc6f4e3c48da26483614c89cf1af87b8265057 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 6 Mar 2020 16:29:48 -0400 Subject: [PATCH 170/215] Update settings.md max_insert_threads corrected ru description --- docs/ru/operations/settings/settings.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f71b24ff269..5f452a59888 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -552,7 +552,17 @@ log_query_threads=1 ## max_insert_threads {#settings-max_insert_threads} -Максимальное количество потоков для выполнения запроса `INSERT SELECT`. По умолчанию (0 - auto) определяется автоматически исходя из количества потоков данных которое выдает SELECT. Если параметр `max_insert_threads` задан (не 0), то будет использовано минимальное значение из заданного и автоматического. +Максимальное количество потоков для выполнения запроса `INSERT SELECT`. + +Возможные значения: + +- 0 (или 1) — `INSERT SELECT` не выполняется параллельно. +- Положительное целое число, больше 1. + +Значение по умолчанию: 0. + +Параллельный `INSERT SELECT` действует только в том случае, если часть SELECT выполняется параллельно, см. настройку [max_threads](#settings-max_threads). +Чем больше значение `max_insert_threads`, тем больше потребление оперативной памяти. ## max_compress_block_size From 44b6390c62ec3e7a722ff7dec48a2163f82be78a Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 6 Mar 2020 16:35:31 -0400 Subject: [PATCH 171/215] Update settings.md max_insert_threads EN description --- docs/en/operations/settings/settings.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4928fba1ca8..996af978643 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -569,6 +569,20 @@ For queries that are completed quickly because of a LIMIT, you can set a lower ' The smaller the `max_threads` value, the less memory is consumed. +## max_insert_threads {#settings-max_insert_threads} + +The maximum number of threads to execute the `INSERT SELECT` query. + +Possible values: + +- 0 (or 1) — `INSERT SELECT` no parallel execution. +- Positive integer. Bigger than 1. + +Default value: 0. + +Parallel `INSERT SELECT` has effect only if the SELECT part is run in parallel, see [max_threads](#settings-max_threads) setting. +Higher values will lead to higher memory usage. + ## max_compress_block_size The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn't any reason to change this setting. From a5e4381a98dcc84a7fe1373fa5f728dcd2418a10 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 6 Mar 2020 16:42:17 -0400 Subject: [PATCH 172/215] Update settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 996af978643..dd673ca0212 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -580,7 +580,7 @@ Possible values: Default value: 0. -Parallel `INSERT SELECT` has effect only if the SELECT part is run in parallel, see [max_threads](#settings-max_threads) setting. +Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#settings-max_threads) setting. Higher values will lead to higher memory usage. ## max_compress_block_size From 55fdce72eb3876ee493407d70e9a5d03f86b3213 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Mar 2020 23:50:34 +0300 Subject: [PATCH 173/215] Update roadmap --- docs/ru/extended_roadmap.md | 118 ++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 45 deletions(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 0c5d57c54d8..91e969c10ca 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -16,7 +16,7 @@ Задача "normalized z-Order curve" в перспективе может быть полезна для БК и Метрики, так как позволяет смешивать OrderID и PageID и избежать дублирования данных. В задаче также вводится способ индексации путём обращения функции нескольких аргументов на интервале, что имеет смысл для дальнейшего развития. -Изначально делал [Андрей Чулков](https://github.com/achulkov2), ВШЭ, теперь доделывает [Ольга Хвостикова](https://github.com/stavrolia), но сроки немного сдвинуты из-за задачи 25.9. Будем надеятся на реализацию к концу ноября. Впрочем, [Андрей Чулков](https://github.com/achulkov2) скоро сможет помочь её доделать. +Изначально делал [Андрей Чулков](https://github.com/achulkov2), ВШЭ, теперь (не) доделывает [Ольга Хвостикова](https://github.com/stavrolia), но сроки немного сдвинуты из-за задачи 25.9. Будем надеятся на лучшее. Upd. Доделывать будет другой человек. Приоритет не высокий. @@ -32,6 +32,8 @@ Q1. Делает [Александр Токмаков](https://github.com/tavplu Q1. И полностью immutable куски. Делает [Александр Сапин](https://github.com/alesapin). Готов приступить к задаче в конце ноября 2019. Нужно для Яндекс.Метрики. +Upd. Большая часть задачи реализована и добавлена в master. Есть незначительные технические долги. Остаётся реализация неблокирующего изменения метаданных таблицы. + ### 1.4. Нетранзитивные ALTER столбцов. Требует 1.3. Будет делать [Александр Сапин](https://github.com/alesapin). @@ -45,6 +47,8 @@ Q1. И полностью immutable куски. Делает [Александр Компактные куски - Q1, куски в оперативке Q1/Q2. +Компактные куски реализованы, ещё не включены по-умолчанию. Первым шагом включаем по-умолчанию для системных таблиц. + Делает [Антон Попов](https://github.com/CurtizJ), первый рабочий вариант в декабре. Пререквизит чтобы снизить сложность мелких INSERT, что в свою очередь нужно для 1.12, иначе задача 1.12 не сможет нормально работать. Особенно нужно для Яндекс.Облака. Данные в таблицах типа MergeTree в ClickHouse хранятся в виде набора независимых "кусков". Внутри куска, каждый столбец, а также индекс, хранится в отдельных файлах. Это сделано для возможности быстрых манипуляций со столбцами (пример - запрос ALTER DROP COLUMN). При вставке данных (INSERT), создаётся новый кусок. Для таблиц с большим количеством столбцов, запросы INSERT с маленьким количеством строк являются неэффективными, так как требуют создания большого количества файлов в файловой системе. Это является врождённой особенностью ClickHouse - одной из первой проблем, с которыми сталкиваются пользователи. Пользователям приходится буферизовывать данные и собирать их в более крупные пачки перед вставкой в ClickHouse. @@ -57,11 +61,12 @@ Q1. И полностью immutable куски. Делает [Александр Требует 1.6. -### 1.8. Перенос между разделами по TTL. +### 1.8. + Перенос между разделами по TTL. Делает [Владимир Чеботарёв](https://github.com/excitoon), Altinity. Декабрь 2019. Q1. Закоммичено, но есть технический долг, который исправляется сейчас. +Готово. ### 1.9. Использование TTL для прореживания данных. @@ -95,6 +100,8 @@ Q1. Закоммичено, но есть технический долг, ко ### 1.11. Виртуальная файловая система. +В процессе реализации, сейчас на VFS переведены Log, TinyLog, StripeLog, готовится MergeTree. + Q2. Нужно для Яндекс.Облака. Делает Александр, Яндекс.Облако, а также Олег Ершов, ВШЭ и Яндекс. @@ -110,6 +117,8 @@ Q2. Нужно для Яндекс.Облака. Требует 1.11. Желательно 1.6 и 1.18. Делает Александр, Яндекс.Облако (сначала часть для S3), а также Олег Ершов, ВШЭ и Яндекс. +Upd. Олег будет делать только часть про HDFS. + ### 1.13. Ускорение запросов с FINAL. Требует 2.1. Делает [Николай Кочетов](https://github.com/KochetovNicolai). Нужно для Яндекс.Метрики. @@ -174,6 +183,8 @@ Upd. Включили по-умолчанию. Удаление старого В очереди. Нужно для YQL. +Александр Токмаков исправил множество проблем с использованием Context и сейчас переносит каталог БД наружу. + ### 2.8. Декларативный парсер запросов. Средний приоритет. Нужно для YQL. @@ -277,6 +288,8 @@ Upd. Иван Блинков сделал эту задачу путём зам [#9075](https://github.com/ClickHouse/ClickHouse/pull/9075) Q1. Нужно для Метрики, в очереди. Никита Михайлов. +Upd. Задача на финальной стадии разработки. + ### 5.2. Автонастройка лимита на оперативку и размера кэшей. ### 5.3. + Встроенная ручка для Prometheus. @@ -288,20 +301,20 @@ Q1. Нужно для Метрики, в очереди. Никита Михай ### 5.5. + LTS релизы. Требует 7.5. Задачу хочет Метрика, Облако, БК, Маркет и Altinity. Первой LTS версией уже стала версия 19.14. -Метрика, БК, Маркет уже используют более свежие версии чем LTS. +Метрика, БК, Маркет, Altinity уже используют более свежие версии чем LTS. ## 6. Инструментирование. -### 6.1. Исправления сэмплирующего профайлера запросов. +### 6.1. + Исправления сэмплирующего профайлера запросов. Михаил Филимонов, Altinity. Ноябрь 2019. Сделано. -Осталось ещё проверить работоспособность профайлера в первом потоке (что важно для INSERT). Иван Лежанкин. Q1. +Осталось ещё проверить работоспособность профайлера в первом потоке (что важно для INSERT). Иван Лежанкин. Q1. Сделано. -### 6.2. Добавление memory profiler. +### 6.2. + Добавление memory profiler. [#6387](https://github.com/ClickHouse/ClickHouse/issues/6387) -Сравнительно простая задача, но только для опытных разработчиков. Нужна всем. Иван Лежанкин. Q1. +Сравнительно простая задача, но только для опытных разработчиков. Нужна всем. Иван Лежанкин. Q1. Сделано. ### 6.3. Учёт оперативки total расширить не только на запросы. @@ -313,6 +326,8 @@ Q1. Нужно для Метрики, в очереди. Никита Михай В Linux существует возможность получать в программе информацию о счётчиках производительности и событиях, относящихся к CPU и ядру ОС. Подробнее смотрите `man perf_event_open`. Предлагается добавить эти метрики в ClickHouse для инструментирования запросов. +Есть прототип. + ### 6.5. Эксперименты с LLVM X-Ray. Требует 2.2. @@ -357,9 +372,9 @@ Q1. Нужно для Метрики, в очереди. Никита Михай [Александр Сапин](https://github.com/alesapin). -### 7.6. Правильный статистический тест для comparison mode в clickhouse-performance-test. +### 7.6. + Правильный статистический тест для comparison mode в clickhouse-performance-test. -Задачу начал делать Дмитрий Рубашкин (ВШЭ). Сейчас продолжает [Александр Кузьменков](https://github.com/akuzm). +Задачу начал делать Дмитрий Рубашкин (ВШЭ). Сейчас продолжает [Александр Кузьменков](https://github.com/akuzm). Сделано, работает в CI. ### 7.7. Доделать тесты под MSan. @@ -441,15 +456,13 @@ Upd. В рамках данной задачи добавляем подстве [Александр Сапин](https://github.com/alesapin). Может делегировать эту задачу кому угодно. Upd. Сделано всё кроме инструкции на сайте. Для этого требуется создать директории testing/stable/prestable на repo.yandex.ru. Внезапно оказалось, что человек, отвечающий за это, в отпуске, и он не отвечает на вопрос, кто его заместитель. Q1. -### 7.18.1. Доделать бинарники под Mac. +### 7.18. + Доделать бинарники под Mac. Уже есть автосборка бинарников под Mac на каждый коммит и PR, но с недостатками. [Иван Лежанкин](https://github.com/abyss7). Требует 7.1, 7.2. Рекомендуется 7.14. Сейчас не хватает по крайней мере SSL и ICU. Нужно для Яндекс.Облака. Upd. Сделано SSL. Ориентируемся в Q1, но приоритет средний и может потеряться. -Not ready. There is no instruction on the main https://clickhouse.tech/ page neither in documentation. - -### 7.18. Поместить ссылку на собранные бинарники под Mac на сайт. +### 7.18.1. Поместить ссылку на собранные бинарники под Mac на сайт. Сейчас людям приходится делать несколько кликов, чтобы их скачать. [Иван Лежанкин](https://github.com/abyss7) или [Александр Сапин](https://github.com/alesapin). @@ -598,7 +611,9 @@ UPD: Все патчи Максима отправлены в master. Задач ### 8.1. Поддержка ALTER MODIFY SETTING для Kafka. -Altinity. +Также - возможность указать все настройки форматов в Kafka. + +Altinity. Никто не делает эту задачу. ### 8.2. Поддержка Mongo Atlas URI. @@ -618,7 +633,7 @@ Altinity. ### 8.6. Kerberos аутентификация для HDFS и Kafka. -Андрей Коняев, ArenaData. +Андрей Коняев, ArenaData. Он куда-то пропал. ### 8.7. + Исправление мелочи HDFS на очень старых ядрах Linux. @@ -661,7 +676,7 @@ Andrew Onyshchuk. Есть pull request. Q1. Сделано. Формат Apache Avro является компактным структурированным построчным бинарным форматом данных с внешней схемой. Этот формат часто используется совместно с Kafka и поддержка его в качестве одного из форматов ввода-вывода в ClickHouse является востребованной пользователями. -### 8.16.1. Поддержка формата JSONEachRow, засунутого в массив. +### 8.16.1. + Поддержка формата JSONEachRow, засунутого в массив. Павел Круглов, ВШЭ и Яндекс. Есть pull request. @@ -676,6 +691,7 @@ Andrew Onyshchuk. Есть pull request. Q1. Сделано. ### 8.16.4. Формат Regexp. Павел Круглов, ВШЭ и Яндекс. +Есть pull request. ### 8.17. ClickHouse как MySQL реплика. @@ -697,6 +713,8 @@ Maxim Fedotov, Wargaming + Yuri Baranov, Яндекс. Следующей по востребованности является система очередей RabbitMQ. Её поддержка в ClickHouse отсутствует. +Есть pull request в процессе разработки. + ### 8.20. Интеграция с SQS. Низкий приоритет. @@ -705,11 +723,14 @@ Maxim Fedotov, Wargaming + Yuri Baranov, Яндекс. Нужно для БК. Декабрь 2019. В декабре для БК сделан минимальный вариант этой задачи. +Максимальный вариант, вроде, никому не нужен. ### 8.22. Поддержка синтаксиса для переменных в стиле MySQL. При парсинге запроса преобразовывать синтаксис вида `@@version_full` в вызов функции `getGlobalVariable('version_full')`. Поддержать популярные MySQL переменные. Может быть поможет Юрий Баранов, если будет энтузиазм. +Upd. Юрий Баранов работает в Google, там запрещено разрабатывать ClickHouse. + ### 8.23. Подписка для импорта обновляемых и ротируемых логов в ФС. Желательно 2.15. @@ -745,6 +766,8 @@ ClickHouse предоставляет возможность обратитьс Код YT использует SIGILL вместо abort. Это, опять же, происходит при учениях. Нужно для БК и Метрики. Поиск причин - [Александр Сапин](https://github.com/alesapin). Дальшейшее исправление возможно на стороне YT. +Upd. Одну причину устранили, но ещё что-то неизвестное осталось. + ### 10.3. Возможность чтения данных из статических таблиц в YT словарях. Нужно для БК и Метрики. @@ -757,6 +780,8 @@ ClickHouse предоставляет возможность обратитьс Нужно для Метрики. +Для MySQL сделал Clément Rodriguez. + ### 10.6. Словари из Cassandra и Couchbase. ### 10.7. Поддержка Nullable в словарях. @@ -769,13 +794,13 @@ ClickHouse предоставляет возможность обратитьс ### 10.9. Уменьшение блокировок для cache словарей за счёт одновременных запросов одного и того же. -Никита Михайлов. Q1. Нужно для БК, но мотивация задачи находится под вопросом, так как есть рабочее предположение о том, что данная задача не устраняет причину проблемы. +Заменено в пользу 10.10, 10.11. -### 10.10. Возможность использования старых значений из cache словаря пока они перезапрашиваются. +### 10.10. + Возможность использования старых значений из cache словаря пока они перезапрашиваются. Никита Михайлов. Q1. Нужно для БК и Метрики. -### 10.11. Возможность исключительно асинхронных запросов в cache словарях. +### 10.11. + Возможность исключительно асинхронных запросов в cache словарях. Никита Михайлов. Q1. Нужно для БК и Метрики. Требует 10.10. @@ -817,7 +842,7 @@ ClickHouse предоставляет возможность обратитьс ### 11.3. Интеграционные тесты ODBC драйвера путём подключения ClickHouse к самому себе через ODBC. -Денис Глазачев, Altinity. +Михаил Филимонов, Altinity. Есть почти готовый pull request. ### 11.4. Исправление упячек с типами Date и Decimal в clickhouse-cpp. @@ -853,10 +878,10 @@ zhang2014, есть pull request. [Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в начале февраля 2019. Q1. Сейчас сделаны все интерфейсы в коде и запросы, но не сделаны варианты хранения прав кроме прототипа. -### 12.2. Управление пользователями и правами доступа с помощью SQL запросов. +### 12.2. + Управление пользователями и правами доступа с помощью SQL запросов. [Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в декабре 2019. -Q1. +Q1. Сделано управление правами полностью, но не реализовано их хранение, см. 12.1. ### 12.3. Подключение справочника пользователей и прав доступа из LDAP. @@ -890,6 +915,7 @@ Q1/Q2. Требует 13.2 или сможем сделать более неудобную реализацию раньше. Обсуждается вариант неудобной реализации. Пока средний приоритет, целимся на Q1/Q2. +Вариант реализации выбрал Александр Казаков. ## 14. Диалект SQL. @@ -926,8 +952,6 @@ zhang2014 ### 14.8. Модификаторы DISTINCT, ORDER BY для агрегатных функций. -Софья Борзенкова, ВШЭ. - В ClickHouse поддерживается вычисление COUNT(DISTINCT x). Предлагается добавить возможность использования модификатора DISTINCT для всех агрегатных функций. Например, AVG(DISTINCT x) - вычислить среднее значение для всех различных значений x. Под вопросом вариант, в котором фильтрация уникальных значений выполняется по одному выражению, а агрегация по другому. Результат некоторых агрегатных функций зависит от порядка данных. Предлагается реализовать модификатор ORDER BY, задающий порядок явно. Пример: groupArray(x ORDER BY y, z). @@ -992,7 +1016,7 @@ zhang2014. Артём Зуйков. Сейчас merge JOIN включается вручную опцией и всегда замедляет запросы. Хотим, чтобы он замедлял запросы только когда это неизбежно. Кстати, смысл merge JOIN появляется только совместно с 15.2 и 15.3. -Q1. +Q1. Сделали адаптивный вариант, но вроде он что-то всё-ещё замедляет. ### 15.1.1. Алгоритм two-level merge JOIN. @@ -1054,6 +1078,7 @@ ClickHouse не является geospatial СУБД. Тем не менее, в Похожая, но более сложная задача, которую ClickHouse пока не умеет решать - определение полигона среди множества полигонов, в которые попадают точки. Для примера: определение района города по географическим координатам. Для решения этой задачи нужно будет реализовать поддержку словарей с полигонами, в которых данные проиндексированы для быстрого поиска. Upd. Андрей сделал прототип интерфейса и реализацию-заглушку внутри него. +Upd. Андрей сделал прототип более оптимальной структуры данных. ### 17.2. GIS типы данных и операции. @@ -1086,6 +1111,8 @@ Upd. Андрей сделал прототип интерфейса и реал Предлагается реализовать в ClickHouse статистические тесты (Analysis of Variance, тесты нормальности распределения и т. п.) в виде агрегатных функций. Пример: `welchTTest(value, sample_idx)`. +Сделали прототип одного теста, есть pull request. + ### 18.3. Инфраструктура для тренировки моделей в ClickHouse. В очереди. Возможно, Александр Кожихов. У него сначала идёт задача 24.26. @@ -1115,9 +1142,11 @@ Upd. Андрей сделал прототип интерфейса и реал В прошлом году, Алексей добавил модельную реализацию (mock) интерфейса ZooKeeper для тестирования. Сейчас предлагается сделать реализацию поверх Etcd, а также расширить возможности тестовой реализации. +Upd. Алексей сделал какой-то вариант, но борется с тем, что ничего не работает. + ### 19.3. Подключение YT Cypress или YDB как альтернативы ZooKeeper. -Hold. Полезно для заказчиков внутри Яндекса, но есть риски. +Hold. Полезно для заказчиков внутри Яндекса, но есть риски. Эту задачу никто не будет делать. ### 19.4. internal_replication = 'auto'. @@ -1163,7 +1192,7 @@ Hold. Полезно для заказчиков внутри Яндекса, н Начинал Олег Ершов, доделывает Никита Михайлов, помогает [Александр Кузьменков](https://github.com/akuzm). Готово. -### 21.1.1. Избавление от лишнего копирование при параллельном парсинге форматов, если возможен mmap файла целиком. +### 21.1.1. Избавление от лишнего копирования при параллельном парсинге форматов, если возможен mmap файла целиком. ### 21.2. Параллельное форматирование форматов. @@ -1173,6 +1202,8 @@ Hold. Полезно для заказчиков внутри Яндекса, н Нужно всем (Zen, БК, DataLens, TestEnv...). Антон Попов, Q1/Q2. +Upd. Антон делает эту задачу. Большая часть уже реализована. + ### 21.4. Использование ORDER BY ключа для оптимизации GROUP BY и DISTINCT. Дмитрий Рубашкин, ВШЭ. Помогает Антон Попов. @@ -1183,7 +1214,7 @@ Hold. Полезно для заказчиков внутри Яндекса, н В прошлом году, аналогичное решение сделали для операции ORDER BY. -### 21.5. Распараллеливание INSERT при INSERT SELECT, если это необходимо. +### 21.5. + Распараллеливание INSERT при INSERT SELECT, если это необходимо. [Vxider](https://github.com/Vxider), ICT Есть pull request. @@ -1193,7 +1224,7 @@ Hold. Полезно для заказчиков внутри Яндекса, н ### 21.7. Кэш результатов запросов. [Achimbab](https://github.com/achimbab). -Есть pull request. +Есть pull request. Но это не совсем то. ### 21.8. Взаимная интеграция аллокатора и кэша. @@ -1248,8 +1279,6 @@ Amos Bird. ### 21.14. Оптимизация запросов с помощью constraints. -Мария Нефедова, ВШЭ. - Constraints позволяют задать выражение, истинность которого проверяется при вставке данных в таблицу. Предположение о том, что выражение истинно, может использоваться и для оптимизации запросов. Например, встретив в запросе точно такое же выражение, можно заменить его на константу 1. Если выражение содержит равенство, то встретив в запросе одну из частей равенства, её можно заменить на другую часть равенства, если это сделает проще чтение данных или вычисление выражения. Например, задан constraint: `URLDomain = domain(URL)`. Значит, выражение `domain(URL)` можно заменить на `URLDomain`. @@ -1334,9 +1363,11 @@ N.Vartolomei. ### 22.5. + Исправление редких срабатываний TSan в stress тестах в CI. -Александр Казаков. +Александр Казаков сделал эту задачу. -### 22.6. Изменение только DEFAULT в ALTER TABLE может поменять тип столбца. +### 22.6. + Изменение только DEFAULT в ALTER TABLE может поменять тип столбца. + +Александр Сапин сделал эту задачу. ### 22.7. + Row-Level Security не работает в случае наличия в запросе IN подзапросов. @@ -1371,7 +1402,7 @@ N.Vartolomei. Изначально было назначено на [Ивана Лежанкина](https://github.com/abyss7), но в результате сделал Александр Сапин. -### 22.14. Посмотреть, почему не работает StorageSet для MergeTree таблиц при некоторых условиях. +### 22.14. + Посмотреть, почему не работает StorageSet для MergeTree таблиц при некоторых условиях. Вроде бы сделал Никита Михайлов - проверить существующие issues на эту тему. @@ -1502,8 +1533,6 @@ Q1. [Николай Кочетов](https://github.com/KochetovNicolai). ### 24.2. Экспериментальные алгоритмы сжатия. -Анастасия Наумова, ВШЭ. - ClickHouse поддерживает LZ4 и ZSTD для сжатия данных. Эти алгоритмы являются парето-оптимальными по соотношению скорости и коэффициентам сжатия среди достаточно известных. Тем не менее, существуют менее известные алгоритмы сжатия, которые могут превзойти их по какому-либо критерию. Из потенциально более быстрых по сравнимом коэффициенте сжатия: Lizard, LZSSE, density. Из более сильных: bsc и csc. Необходимо изучить эти алгоритмы, добавить их поддержку в ClickHouse и исследовать их работу на тестовых датасетах. ### 24.3. Экспериментальные кодеки. @@ -1514,19 +1543,14 @@ ClickHouse поддерживает LZ4 и ZSTD для сжатия данных Внедрить их в ClickHouse в виде кодеков и изучить их работу на тестовых датасетах. -### 24.4. Шифрование в ClickHouse на уровне кусков данных. - -Yuchen Dong, ICT. +### 24.4. Шифрование в ClickHouse на уровне VFS. Данные в ClickHouse хранятся без шифрования. При наличии доступа к дискам, злоумышленник может прочитать данные. Предлагается реализовать два подхода к шифрованию: -1. Шифрование блоков данных. -Шифрование данных столбцов на диске требуется реализовать в виде кодеков. Это позволит применять шифрование к отдельным столбцам; применять его после сжатия данных (эффективно, но менее безопасно) или без сжатия. Потребуется проработать работу с ключами: получение ключей из отдельного сервиса, правильная работа с ключами в оперативке. Отдельным вопросом стоит шифрование индексов. +1. Шифрование на уровне VFS. ### 24.5. Поддержка функций шифрования для отдельных значений. -Yuchen Dong, ICT. - Смотрите также 24.5. 2. Шифрование отдельных значений. @@ -1608,6 +1632,8 @@ ClickHouse предоставляет достаточно богатый наб В компании nVidia сделали прототип offloading вычисления GROUP BY с некоторыми из агрегатных функций в ClickHouse и обещат предоставить исходники в публичный доступ для дальнейшего развития. Предлагается изучить этот прототип и расширить его применимость для более широкого сценария использования. В качестве альтернативы, предлагается изучить исходные коды системы `OmniSci` или `Alenka` или библиотеку `CUB` https://nvlabs.github.io/cub/ и применить некоторые из алгоритмов в ClickHouse. +Upd. В компании nVidia выложили прототип, теперь нужна интеграция в систему сборки. + ### 24.13. Stream запросы. Пререквизит для ClickHouse как CEP-системы. @@ -1769,7 +1795,7 @@ Amos Bird, но его решение слишком громоздкое и п ### 25.11. Митапы зарубежные: восток США (Нью Йорк, возможно Raleigh), возможно северо-запад (Сиэтл), Китай (Пекин снова, возможно митап для разработчиков или хакатон), Лондон. -[Иван Блинков](https://github.com/blinkov/) - организация. Две штуки в США запланированы. +[Иван Блинков](https://github.com/blinkov/) - организация. Две штуки в США запланированы. Upd. Два митапа в США и один в Европе проведены. ### 25.12. Статья "научная" - про устройство хранения данных и индексов или whitepaper по архитектуре. Есть вариант подать на VLDB. @@ -1800,6 +1826,8 @@ Amos Bird, но его решение слишком громоздкое и п Требуется проработать вопрос безопасности и изоляции инстансов (поднятие в контейнерах с ограничениями по сети), подключение тестовых датасетов с помощью copy-on-write файловой системы; органичения ресурсов. +Есть минимальный прототип. + ### 25.17. Взаимодействие с ВУЗами: ВШЭ, УрФУ, ICT Beijing. Алексей Миловидов и вся группа разработки @@ -1840,4 +1868,4 @@ UPD: не участвуем. ### 25.27. Обновить сайт ClickHouse. -Иван Блинков. Есть риски. +Иван Блинков. Нет рисков. Нужно для Яндекс.Облака. From 5db4222ff886f7070571fc5657fe0a4e9b4589e2 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 6 Mar 2020 19:25:22 -0400 Subject: [PATCH 174/215] Update settings.md insert_deduplicate , force_deduplicate_childrens RU description --- docs/ru/operations/settings/settings.md | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 80486502dcb..ee7dc4e0654 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -842,6 +842,34 @@ ClickHouse генерирует исключение - [insert_quorum](#settings-insert_quorum) - [insert_quorum_timeout](#settings-insert_quorum_timeout) +## insert_deduplicate {#settings-insert_deduplicate} + +Включает и выключает дедупликацию для запросов `INSERT` (для Replicated* таблиц). + +Возможные значения: + +- 0 — выключена. +- 1 — включена. + +Значение по умолчанию: 1. + +По умолчанию блоки, вставляемые в реплицируемые таблицы оператором `INSERT`, дедуплицируются (см. [Репликация данных](../table_engines/replication.md). + +## force_deduplicate_childrens {#settings-force_deduplicate_childrens} + +Включает и выключает проверку дедупликации для материализованных представлений, которые получают данные из Replicated* таблиц. + +Возможные значения: + +- 0 — выключена. +- 1 — включена. + +Значение по умолчанию: 0. + +По умолчанию проверка дедупликации у материализованных представлений не производится, а наследуюется от Replicated* (основной) таблицы, за которой "следит" материализованное представление. +Т.е. если `INSERT` в основную таблицу д.б. пропущен (сдедуплирован), то автоматически не будет вставки и в материализованные представления. Это имплементировано для того, чтобы работали материализованные представления, которые сильно группируют данные основных `INSERT`, до такой степени что блоки вставляемые в материализованные представления получаются одинаковыми для разных `INSERT` в основную таблицу. +Одновременно это "ломает" идемпотентность вставки в материализованные представления. Т.е. если `INSERT` был успешен в основную таблицу и неуспешен в таблицу материализованного представления (напр. из-за сетевого сбоя при коммуникации с Zookeeper), клиент получит ошибку и попытается повторить `INSERT`. Но вставки в материализованные представления произведено не будет, потому что дедупликация сработает на основной таблице. Настройка `force_deduplicate_childrens` позволяет это изменить. Т.е. при повторном `INSERT` будет произведена дедупликация на таблице материализованного представления, и повторный инсерт вставит данные в таблицу материализованного представления, которые не удалось вставить из-за сбоя первого `INSERT`. + ## count_distinct_implementation {#settings-count_distinct_implementation} Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT ...)](../../query_language/agg_functions/reference.md#agg_function-count). From 506575a8f79f1d8bcfcf5fe395327205a0e5d934 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Mar 2020 04:04:41 +0300 Subject: [PATCH 175/215] Update simple_join_query.xml --- dbms/tests/performance/simple_join_query.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/performance/simple_join_query.xml b/dbms/tests/performance/simple_join_query.xml index 919cce33be6..8ef3d97460d 100644 --- a/dbms/tests/performance/simple_join_query.xml +++ b/dbms/tests/performance/simple_join_query.xml @@ -16,7 +16,7 @@ INSERT INTO join_table SELECT number AS A, toString(arrayMap(x->x, range(100))) S0, S0 AS S1, S0 AS S2, S0 AS S3 from numbers(500000) SELECT COUNT() FROM join_table LEFT JOIN join_table USING A - SELECT COUNT() FROM join_table LEFT JOIN (SELECT A FROM join_table) right USING A + SELECT COUNT() FROM join_table LEFT JOIN (SELECT A FROM join_table) AS right USING A SELECT COUNT() FROM join_table AS left LEFT JOIN join_table AS right ON left.A = right.A SELECT COUNT() FROM join_table AS left LEFT JOIN (SELECT A FROM join_table) AS right ON left.A = right.A From 5f55526b37e219effcbb7aef60d12f0e3f975ad3 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 6 Mar 2020 21:14:36 -0400 Subject: [PATCH 176/215] Update query_complexity.md explanation that `max_result_rows result_overflow_mode=break ` depends of max_block_size --- .../operations/settings/query_complexity.md | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index e5f993540a7..0b09599b8ae 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -107,7 +107,25 @@ Limit on the number of bytes in the result. The same as the previous setting. ## result_overflow_mode What to do if the volume of the result exceeds one of the limits: 'throw' or 'break'. By default, throw. -Using 'break' is similar to using LIMIT. + +Using 'break' is similar to using LIMIT. `Break` interrupts execution only at the block level. The query will return more rows than the limit [max_result_rows](#max_result_rows), multiple of [max_block_size](settings.md#max_block_size) and depends of [max_threads](settings.md#settings-max_threads). + +Example: + +```sql +SET max_threads = 3, max_block_size = 3333; +SET max_result_rows = 3334, result_overflow_mode = 'break'; + +SELECT * +FROM numbers_mt(100000) +FORMAT Null; +``` + +Result: + +```text +6666 rows in set. ... +``` ## max_execution_time From 6062485ed2bbed7c63311e5db1be07f085ab67e2 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 6 Mar 2020 21:25:23 -0400 Subject: [PATCH 177/215] Update query_complexity.md --- docs/en/operations/settings/query_complexity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 01e59057c16..5b9db828d03 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -108,7 +108,7 @@ Limit on the number of bytes in the result. The same as the previous setting. What to do if the volume of the result exceeds one of the limits: 'throw' or 'break'. By default, throw. -Using 'break' is similar to using LIMIT. `Break` interrupts execution only at the block level. The query will return more rows than the limit [max_result_rows](#max_result_rows), multiple of [max_block_size](settings.md#max_block_size) and depends of [max_threads](settings.md#settings-max_threads). +Using 'break' is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](settings.md#setting-max_block_size) and depends on [max_threads](settings.md#settings-max_threads). Example: From c07cb384233bc0afc7b6e882bc5d23f0ce95ab3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 06:19:28 +0300 Subject: [PATCH 178/215] Fixed error --- dbms/tests/performance/if_to_multiif.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/performance/if_to_multiif.xml b/dbms/tests/performance/if_to_multiif.xml index 03f528d6349..e1c45bbb69e 100644 --- a/dbms/tests/performance/if_to_multiif.xml +++ b/dbms/tests/performance/if_to_multiif.xml @@ -11,14 +11,14 @@ nonexistent_table_if_multiif - + - From e33592688296b4830cbb3251568d2ffa79117ba0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 06:28:03 +0300 Subject: [PATCH 179/215] Check XML validity --- utils/check-style/check-style | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 4734788b90e..471488287ab 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -48,3 +48,6 @@ find $ROOT_PATH/{dbms,base} -name '*.h' -or -name '*.cpp' | xargs grep -l -P 'Er # Three or more consecutive empty lines find $ROOT_PATH/{dbms,base} -name '*.h' -or -name '*.cpp' | while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done + +# Broken XML files (requires libxml2-utils) +find $ROOT_PATH/{dbms,base} -name '*.xml' | xargs xmllint --noout --nonet From 643367642c20b63c45ea7198e4457727d831e956 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 06:52:50 +0300 Subject: [PATCH 180/215] Fixed race condition on queue_task_handle --- .../MergeTree/BackgroundProcessingPool.cpp | 14 +++++++++++--- .../Storages/MergeTree/BackgroundProcessingPool.h | 7 ++++++- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 11 +++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index 64a179a8159..d922d9bd302 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -60,19 +60,27 @@ BackgroundProcessingPool::BackgroundProcessingPool(int size_, } -BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::addTask(const Task & task) +BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::createTask(const Task & task) { - TaskHandle res = std::make_shared(*this, task); + return std::make_shared(*this, task); +} +void BackgroundProcessingPool::startTask(const TaskHandle & task) +{ Poco::Timestamp current_time; { std::unique_lock lock(tasks_mutex); - res->iterator = tasks.emplace(current_time, res); + task->iterator = tasks.emplace(current_time, task); } wake_event.notify_all(); +} +BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::addTask(const Task & task) +{ + TaskHandle res = createTask(task); + startTask(res); return res; } diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 200cd434b2f..526cab0800e 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -82,9 +82,14 @@ public: return size; } - /// The task is started immediately. + /// Create task and start it. TaskHandle addTask(const Task & task); + /// Create task but not start it. + TaskHandle createTask(const Task & task); + /// Start the task that was created but not started. Precondition: task was not started. + void startTask(const TaskHandle & task); + void removeTask(const TaskHandle & task); ~BackgroundProcessingPool(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 0ed2527a981..052e218f30b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2902,9 +2902,16 @@ void StorageReplicatedMergeTree::startup() /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attmept to do it startup_event.wait(); - queue_task_handle = global_context.getBackgroundPool().addTask([this] { return queueTask(); }); + /// If we don't separate create/start steps, race condition will happen + /// between the assignment of queue_task_handle and queueTask that use the queue_task_handle. + queue_task_handle = global_context.getBackgroundPool().createTask([this] { return queueTask(); }); + queue_task_handle->startTask(); + if (areBackgroundMovesNeeded()) - move_parts_task_handle = global_context.getBackgroundMovePool().addTask([this] { return movePartsTask(); }); + { + move_parts_task_handle = global_context.getBackgroundMovePool().createTask([this] { return movePartsTask(); }); + move_parts_task_handle->startTask(); + } } From bfb502cf584227b8c8aaf31d6c10e296045ed110 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 07:00:24 +0300 Subject: [PATCH 181/215] Fix race condition in executeMetadataAlter --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 0ed2527a981..db2ffb415df 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3174,7 +3174,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer auto columns_from_entry = ColumnsDescription::parse(entry.columns_str); auto metadata_from_entry = ReplicatedMergeTreeTableMetadata::parse(entry.metadata_str); - auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this).checkAndFindDiff(metadata_from_entry); MergeTreeData::DataParts parts; @@ -3183,7 +3182,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer requests.emplace_back(zkutil::makeSetRequest(replica_path + "/columns", entry.columns_str, -1)); requests.emplace_back(zkutil::makeSetRequest(replica_path + "/metadata", entry.metadata_str, -1)); - zookeeper->multi(requests); { @@ -3192,6 +3190,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this).checkAndFindDiff(metadata_from_entry); setTableStructure(std::move(columns_from_entry), metadata_diff); metadata_version = entry.alter_version; From ea7d0093c14d39321e6b399e39e02bc24c78225d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 07:41:17 +0300 Subject: [PATCH 182/215] Fixed race condition in test --- .../0_stateless/00738_lock_for_inner_table.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/dbms/tests/queries/0_stateless/00738_lock_for_inner_table.sh index 9e20c9469a3..467414a3ac5 100755 --- a/dbms/tests/queries/0_stateless/00738_lock_for_inner_table.sh +++ b/dbms/tests/queries/0_stateless/00738_lock_for_inner_table.sh @@ -9,17 +9,24 @@ DROP TABLE IF EXISTS mv; CREATE TABLE tab_00738(a Int) ENGINE = Log; CREATE MATERIALIZED VIEW mv ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n -${CLICKHOUSE_CLIENT} --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" & +${CLICKHOUSE_CLIENT} --query_id test_00738 --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" & function drop() { - sleep 0.1 ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner.mv\`" -n } +function wait_for_query_to_start() +{ + while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; do sleep 0.001; done +} + +export -f wait_for_query_to_start +timeout 5 bash -c wait_for_query_to_start + drop & wait echo "DROP TABLE IF EXISTS tab_00738; -DROP TABLE IF EXISTS mv;" | ${CLICKHOUSE_CLIENT} -n +DROP TABLE IF EXISTS mv;" | ${CLICKHOUSE_CLIENT} -n From d4a724dcc7eeaf6c68ef4fb46ead9e259ba384b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 15:44:24 +0300 Subject: [PATCH 183/215] Fixed build --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index e9e21daeb8a..48ba57d0c72 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2904,13 +2904,17 @@ void StorageReplicatedMergeTree::startup() /// If we don't separate create/start steps, race condition will happen /// between the assignment of queue_task_handle and queueTask that use the queue_task_handle. - queue_task_handle = global_context.getBackgroundPool().createTask([this] { return queueTask(); }); - queue_task_handle->startTask(); + { + auto & pool = global_context.getBackgroundPool(); + queue_task_handle = pool.createTask([this] { return queueTask(); }); + pool.startTask(queue_task_handle); + } if (areBackgroundMovesNeeded()) { - move_parts_task_handle = global_context.getBackgroundMovePool().createTask([this] { return movePartsTask(); }); - move_parts_task_handle->startTask(); + auto & pool = global_context.getBackgroundMovePool(); + move_parts_task_handle = pool.createTask([this] { return movePartsTask(); }); + pool.startTask(move_parts_task_handle); } } From c658d85aa831a908d930d4a580c73bbbfdee2579 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:04:47 +0300 Subject: [PATCH 184/215] Whitespaces --- dbms/src/Processors/LimitTransform.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Processors/LimitTransform.h b/dbms/src/Processors/LimitTransform.h index c308c88db25..f37bf8aba15 100644 --- a/dbms/src/Processors/LimitTransform.h +++ b/dbms/src/Processors/LimitTransform.h @@ -12,7 +12,6 @@ private: InputPort & input; OutputPort & output; - size_t limit; size_t offset; size_t rows_read = 0; /// including the last read block From cfe2464e29d315ea409b50d2bad8449f9cd29e97 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:32:04 +0300 Subject: [PATCH 185/215] Fixed errors --- dbms/src/Storages/StorageGenerateRandom.cpp | 59 +++++++++++++++------ 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index 15634a9ae76..6f98dc1746c 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -49,7 +49,7 @@ void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64_fast & /// The loop can be further optimized. UInt64 number = rng(); unalignedStore(data, number); - data += sizeof(UInt64); /// We assume that data has 15-byte padding (see PaddedPODArray) + data += sizeof(UInt64); /// We assume that data has at least 7-byte padding (see PaddedPODArray) } } @@ -63,23 +63,50 @@ ColumnPtr fillColumnWithRandomData( { case TypeIndex::String: { - auto size_column = ColumnUInt32::create(); - auto & sizes = size_column->getData(); + /// Mostly the same as the implementation of randomPrintableASCII function. - sizes.resize(limit); - for (UInt64 i = 0; i < limit; ++i) - sizes[i] = static_cast(rng()) % max_string_length; /// Slow + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + offsets_to.resize(limit); - ColumnWithTypeAndName argument{std::move(size_column), std::make_shared(), "size"}; - - Block block + IColumn::Offset offset = 0; + for (size_t row_num = 0; row_num < limit; ++row_num) { - argument, - {nullptr, type, "result"} - }; + size_t length = rng() % (max_string_length + 1); /// Slow - FunctionFactory::instance().get("randomPrintableASCII", context)->build({argument})->execute(block, {0}, 1, limit); - return block.getByPosition(1).column; + IColumn::Offset next_offset = offset + length + 1; + data_to.resize(next_offset); + offsets_to[row_num] = next_offset; + + auto * data_to_ptr = data_to.data(); /// avoid assert on array indexing after end + for (size_t pos = offset, end = offset + length; pos < end; pos += 4) /// We have padding in column buffers that we can overwrite. + { + UInt64 rand = rng(); + + UInt16 rand1 = rand; + UInt16 rand2 = rand >> 16; + UInt16 rand3 = rand >> 32; + UInt16 rand4 = rand >> 48; + + /// Printable characters are from range [32; 126]. + /// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + + data_to_ptr[pos + 0] = 32 + ((rand1 * 95) >> 16); + data_to_ptr[pos + 1] = 32 + ((rand2 * 95) >> 16); + data_to_ptr[pos + 2] = 32 + ((rand3 * 95) >> 16); + data_to_ptr[pos + 3] = 32 + ((rand4 * 95) >> 16); + + /// NOTE gcc failed to vectorize this code (aliasing of char?) + /// TODO Implement SIMD optimizations from Danila Kutenin. + } + + data_to[offset + length] = 0; + + offset = next_offset; + } + + return column; } case TypeIndex::Enum8: @@ -129,7 +156,7 @@ ColumnPtr fillColumnWithRandomData( offsets.resize(limit); for (UInt64 i = 0; i < limit; ++i) { - offset += static_cast(rng()) % max_array_length; + offset += static_cast(rng()) % (max_array_length + 1); offsets[i] = offset; } @@ -252,7 +279,7 @@ ColumnPtr fillColumnWithRandomData( fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal32), rng); return column; } - case TypeIndex::Decimal64: + case TypeIndex::Decimal64: /// TODO Decimal may be generated out of range. { auto column = type->createColumn(); auto & column_concrete = typeid_cast &>(*column); From 7c791e1cf10dd503e426e9c0142b2f6da7b7fecf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:32:36 +0300 Subject: [PATCH 186/215] Remove infinite performance tests --- dbms/tests/performance/array_fill.xml | 2 +- dbms/tests/performance/base64_hits.xml | 2 +- dbms/tests/performance/bloom_filter.xml | 2 +- .../performance/mingroupby-orderbylimit1.xml | 2 +- dbms/tests/performance/sum_map.xml | 2 +- dbms/tests/performance/system_numbers.xml | 60 +++++++++---------- 6 files changed, 34 insertions(+), 36 deletions(-) diff --git a/dbms/tests/performance/array_fill.xml b/dbms/tests/performance/array_fill.xml index 25ed745158b..51204299fad 100644 --- a/dbms/tests/performance/array_fill.xml +++ b/dbms/tests/performance/array_fill.xml @@ -1,5 +1,5 @@ - once + loop diff --git a/dbms/tests/performance/base64_hits.xml b/dbms/tests/performance/base64_hits.xml index edf4321fa07..1a1be4842bc 100644 --- a/dbms/tests/performance/base64_hits.xml +++ b/dbms/tests/performance/base64_hits.xml @@ -1,5 +1,5 @@ - once + loop hits_100m_single diff --git a/dbms/tests/performance/bloom_filter.xml b/dbms/tests/performance/bloom_filter.xml index 073b3c722be..ef35af9965d 100644 --- a/dbms/tests/performance/bloom_filter.xml +++ b/dbms/tests/performance/bloom_filter.xml @@ -1,5 +1,5 @@ - once + loop diff --git a/dbms/tests/performance/mingroupby-orderbylimit1.xml b/dbms/tests/performance/mingroupby-orderbylimit1.xml index 85735e1ef25..ec69ffa2b8f 100644 --- a/dbms/tests/performance/mingroupby-orderbylimit1.xml +++ b/dbms/tests/performance/mingroupby-orderbylimit1.xml @@ -1,5 +1,5 @@ - once + loop diff --git a/dbms/tests/performance/sum_map.xml b/dbms/tests/performance/sum_map.xml index 69993ad58dc..ac1ccaae0fe 100644 --- a/dbms/tests/performance/sum_map.xml +++ b/dbms/tests/performance/sum_map.xml @@ -1,5 +1,5 @@ - once + loop diff --git a/dbms/tests/performance/system_numbers.xml b/dbms/tests/performance/system_numbers.xml index 296d6850250..94d8ef76fa1 100644 --- a/dbms/tests/performance/system_numbers.xml +++ b/dbms/tests/performance/system_numbers.xml @@ -3,8 +3,6 @@ - - 4000 12000 @@ -17,48 +15,48 @@ То есть, этот запрос представляет собой необычным образом написанный бесконечный цикл. Мы запускаем этот запрос и наблюдаем, с какой скоростью он выполняется. Через несколько секунд, когда скорость стабилизируется, прерываем выполнение. В качестве скорости выполнения запроса указывается количество обработанных исходных (прочитанных из таблицы) данных в единицу времени. -Например, в таблице system.numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. --> -SELECT count() FROM system.numbers WHERE NOT ignore(rand()) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(rand()) +Например, в таблице numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. --> +SELECT count() FROM numbers(100000000) WHERE NOT ignore(rand()) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(rand()) -SELECT count() FROM system.numbers WHERE NOT ignore(intHash64(number)) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(intHash64(number)) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash64(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash64(number)) -SELECT count() FROM system.numbers WHERE NOT ignore(intHash32(number)) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(intHash32(number)) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash32(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash32(number)) -SELECT count() FROM system.numbers WHERE NOT ignore(toString(number)) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(toString(number)) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(toString(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(toString(number)) -SELECT count() FROM system.numbers WHERE NOT ignore(reinterpretAsString(number)) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(reinterpretAsString(number)) -SELECT count() FROM system.numbers WHERE NOT ignore(number / 7) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(number / 7) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(number / 7) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number / 7) -SELECT count() FROM system.numbers WHERE NOT ignore(number % 7) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(number % 7) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(number % 7) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 7) -SELECT count() FROM system.numbers WHERE NOT ignore(number % 34908756) -SELECT count() FROM system.numbers_mt WHERE NOT ignore(number % 34908756) +SELECT count() FROM numbers(100000000) WHERE NOT ignore(number % 34908756) +SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 34908756) -SELECT number % 1000 AS k, count() FROM system.numbers GROUP BY k -SELECT number % 1000 AS k, count() FROM system.numbers_mt GROUP BY k +SELECT number % 1000 AS k, count() FROM numbers(100000000) GROUP BY k +SELECT number % 1000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k -SELECT number % 100000 AS k, count() FROM system.numbers GROUP BY k -SELECT number % 100000 AS k, count() FROM system.numbers_mt GROUP BY k +SELECT number % 100000 AS k, count() FROM numbers(100000000) GROUP BY k +SELECT number % 100000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k -SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k -SELECT number % 1000000 AS k, count() FROM system.numbers_mt GROUP BY k +SELECT number % 1000000 AS k, count() FROM numbers(100000000) GROUP BY k +SELECT number % 1000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k -SELECT number % 10000000 AS k, count() FROM system.numbers GROUP BY k -SELECT number % 10000000 AS k, count() FROM system.numbers_mt GROUP BY k +SELECT number % 10000000 AS k, count() FROM numbers(100000000) GROUP BY k +SELECT number % 10000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k -SELECT number % 500000000 AS k, count() FROM system.numbers GROUP BY k -SELECT number % 500000000 AS k, count() FROM system.numbers_mt GROUP BY k +SELECT number % 500000000 AS k, count() FROM numbers(100000000) GROUP BY k +SELECT number % 500000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k - + -SELECT count() FROM system.numbers WHERE NOT ignore(materialize('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') AS s, concat(s,s,s,s,s,s,s,s,s,s) AS t, concat(t,t,t,t,t,t,t,t,t,t) AS u) SETTINGS max_block_size = 1000 +SELECT count() FROM numbers(100000000) WHERE NOT ignore(materialize('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') AS s, concat(s,s,s,s,s,s,s,s,s,s) AS t, concat(t,t,t,t,t,t,t,t,t,t) AS u) SETTINGS max_block_size = 1000 From 8fa05b080c239d942d36b48d18434c779dc97010 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:33:06 +0300 Subject: [PATCH 187/215] Renamed a test --- .../{system_numbers.xml => synthetic_hardware_benchmark.xml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/performance/{system_numbers.xml => synthetic_hardware_benchmark.xml} (100%) diff --git a/dbms/tests/performance/system_numbers.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml similarity index 100% rename from dbms/tests/performance/system_numbers.xml rename to dbms/tests/performance/synthetic_hardware_benchmark.xml From 1645ce8696a252896118feea0ef0891d07f7cd9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:41:16 +0300 Subject: [PATCH 188/215] Removed outdated info --- dbms/tests/performance/README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/tests/performance/README.md b/dbms/tests/performance/README.md index d436eb7bce3..a797b88a321 100644 --- a/dbms/tests/performance/README.md +++ b/dbms/tests/performance/README.md @@ -6,17 +6,9 @@ This directory contains `.xml`-files with performance tests for `clickhouse-perf First of all you should check existing tests don't cover your case. If there are no such tests than you should write your own. -There two types of performance tests: -* First is executed in loop, and have tag `loop` in config. -* Second one is executed only once and have tag `once` in config. +You have to specify `preconditions`. It contains table names. Only `hits_100m_single`, `hits_10m_single`, `test.hits` are available in CI. -Type `once` should be used only for endless queries. Even if your query really long (10 seconds+), it's better to choose `loop` test. - -After you have choosen type, you have to specify `preconditions`. It contains table names. Only `hits_100m_single`, `hits_10m_single`, `test.hits` are available in CI. - -The most important part of test is `stop_conditions`. For `loop` test you should always use `min_time_not_changing_for_ms` stop condition. For `once` test you can choose between `average_speed_not_changing_for_ms` and `max_speed_not_changing_for_ms`, but first is preferable. Also you should always specify `total_time_ms` metric. Endless tests will be ignored by CI. - -`loop` tests are always compared by `min_time` metric and `once` tests compared by `max_rows_per_second`. +The most important part of test is `stop_conditions`. Also you should always specify `total_time_ms` metric. Endless tests will be ignored by CI. You can use `substitions`, `create`, `fill` and `drop` queries to prepare test. You can find examples in this folder. From c30d1ba18b1fbdc6544906d4d2fb1152d4d52524 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:44:18 +0300 Subject: [PATCH 189/215] Addition to prev. revision --- dbms/tests/performance/IPv4.xml | 1 - dbms/tests/performance/IPv6.xml | 1 - dbms/tests/performance/agg_functions_min_max_any.xml | 1 - dbms/tests/performance/and_function.xml | 1 - dbms/tests/performance/arithmetic.xml | 1 - dbms/tests/performance/array_element.xml | 1 - dbms/tests/performance/array_fill.xml | 1 - dbms/tests/performance/array_join.xml | 1 - dbms/tests/performance/base64.xml | 1 - dbms/tests/performance/base64_hits.xml | 1 - dbms/tests/performance/basename.xml | 1 - dbms/tests/performance/bitCount.xml | 1 - dbms/tests/performance/bit_operations_fixed_string.xml | 1 - .../performance/bit_operations_fixed_string_numbers.xml | 1 - dbms/tests/performance/bloom_filter.xml | 1 - dbms/tests/performance/bounding_ratio.xml | 1 - dbms/tests/performance/cidr.xml | 1 - dbms/tests/performance/codecs_float_insert.xml | 1 - dbms/tests/performance/codecs_float_select.xml | 1 - dbms/tests/performance/codecs_int_insert.xml | 1 - dbms/tests/performance/codecs_int_select.xml | 1 - dbms/tests/performance/collations.xml | 1 - dbms/tests/performance/column_column_comparison.xml | 1 - dbms/tests/performance/columns_hashing.xml | 1 - dbms/tests/performance/complex_array_creation.xml | 1 - dbms/tests/performance/concat_hits.xml | 1 - dbms/tests/performance/conditional.xml | 1 - dbms/tests/performance/consistent_hashes.xml | 1 - dbms/tests/performance/constant_column_comparison.xml | 1 - dbms/tests/performance/constant_column_search.xml | 1 - dbms/tests/performance/count.xml | 1 - dbms/tests/performance/cpu_synthetic.xml | 1 - dbms/tests/performance/cryptographic_hashes.xml | 1 - dbms/tests/performance/date_parsing.xml | 1 - dbms/tests/performance/date_time.xml | 1 - dbms/tests/performance/date_time_64.xml | 1 - dbms/tests/performance/decimal_aggregates.xml | 1 - dbms/tests/performance/early_constant_folding.xml | 1 - dbms/tests/performance/empty_string_deserialization.xml | 1 - dbms/tests/performance/empty_string_serialization.xml | 1 - dbms/tests/performance/entropy.xml | 1 - dbms/tests/performance/first_significant_subdomain.xml | 1 - dbms/tests/performance/fixed_string16.xml | 1 - dbms/tests/performance/float_formatting.xml | 1 - dbms/tests/performance/float_parsing.xml | 1 - dbms/tests/performance/format_date_time.xml | 1 - dbms/tests/performance/functions_coding.xml | 1 - dbms/tests/performance/functions_geo.xml | 1 - dbms/tests/performance/general_purpose_hashes.xml | 1 - dbms/tests/performance/general_purpose_hashes_on_UUID.xml | 1 - dbms/tests/performance/generate_table_function.xml | 2 -- dbms/tests/performance/great_circle_dist.xml | 1 - dbms/tests/performance/group_array_moving_sum.xml | 1 - dbms/tests/performance/h3.xml | 1 - dbms/tests/performance/if_array_num.xml | 1 - dbms/tests/performance/if_array_string.xml | 1 - dbms/tests/performance/if_string_const.xml | 1 - dbms/tests/performance/if_string_hits.xml | 1 - dbms/tests/performance/if_to_multiif.xml | 1 - dbms/tests/performance/information_value.xml | 1 - dbms/tests/performance/insert_values_with_expressions.xml | 1 - dbms/tests/performance/inserts_arrays_lowcardinality.xml | 1 - dbms/tests/performance/int_parsing.xml | 1 - dbms/tests/performance/jit_large_requests.xml | 1 - dbms/tests/performance/jit_small_requests.xml | 1 - dbms/tests/performance/joins_in_memory.xml | 1 - dbms/tests/performance/joins_in_memory_pmj.xml | 1 - dbms/tests/performance/json_extract_rapidjson.xml | 1 - dbms/tests/performance/json_extract_simdjson.xml | 1 - dbms/tests/performance/leftpad.xml | 1 - dbms/tests/performance/linear_regression.xml | 1 - dbms/tests/performance/logical_functions_large.xml | 1 - dbms/tests/performance/logical_functions_medium.xml | 1 - dbms/tests/performance/logical_functions_small.xml | 1 - dbms/tests/performance/math.xml | 1 - dbms/tests/performance/merge_table_streams.xml | 1 - dbms/tests/performance/merge_tree_huge_pk.xml | 1 - dbms/tests/performance/merge_tree_many_partitions.xml | 1 - dbms/tests/performance/merge_tree_many_partitions_2.xml | 1 - dbms/tests/performance/merge_tree_simple_select.xml | 1 - dbms/tests/performance/mingroupby-orderbylimit1.xml | 1 - dbms/tests/performance/modulo.xml | 1 - dbms/tests/performance/ngram_distance.xml | 1 - dbms/tests/performance/number_formatting_formats.xml | 1 - dbms/tests/performance/nyc_taxi.xml | 1 - dbms/tests/performance/order_by_decimals.xml | 1 - dbms/tests/performance/order_by_read_in_order.xml | 1 - dbms/tests/performance/order_by_single_column.xml | 1 - dbms/tests/performance/parallel_insert.xml | 1 - dbms/tests/performance/parse_engine_file.xml | 1 - dbms/tests/performance/prewhere.xml | 1 - dbms/tests/performance/random_printable_ascii.xml | 1 - dbms/tests/performance/range.xml | 1 - dbms/tests/performance/read_hits_with_aio.xml | 1 - dbms/tests/performance/right.xml | 1 - dbms/tests/performance/round_down.xml | 1 - dbms/tests/performance/round_methods.xml | 1 - dbms/tests/performance/scalar.xml | 1 - dbms/tests/performance/select_format.xml | 1 - dbms/tests/performance/set.xml | 1 - dbms/tests/performance/set_hits.xml | 1 - dbms/tests/performance/set_index.xml | 1 - dbms/tests/performance/simple_join_query.xml | 1 - dbms/tests/performance/slices_hits.xml | 1 - dbms/tests/performance/sort.xml | 1 - dbms/tests/performance/string_join.xml | 1 - dbms/tests/performance/string_set.xml | 1 - dbms/tests/performance/string_sort.xml | 1 - dbms/tests/performance/sum_map.xml | 1 - dbms/tests/performance/synthetic_hardware_benchmark.xml | 1 - dbms/tests/performance/trim_numbers.xml | 2 -- dbms/tests/performance/trim_urls.xml | 2 -- dbms/tests/performance/trim_whitespace.xml | 1 - dbms/tests/performance/uniq.xml | 1 - dbms/tests/performance/url_hits.xml | 2 -- .../tests/performance/vectorize_aggregation_combinators.xml | 2 -- dbms/tests/performance/visit_param_extract_raw.xml | 6 +----- dbms/tests/performance/website.xml | 1 - 118 files changed, 1 insertion(+), 127 deletions(-) diff --git a/dbms/tests/performance/IPv4.xml b/dbms/tests/performance/IPv4.xml index 41e519e0b9d..8f5b61d70c9 100644 --- a/dbms/tests/performance/IPv4.xml +++ b/dbms/tests/performance/IPv4.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/IPv6.xml b/dbms/tests/performance/IPv6.xml index 1c7c9d679ab..272d54c851b 100644 --- a/dbms/tests/performance/IPv6.xml +++ b/dbms/tests/performance/IPv6.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/agg_functions_min_max_any.xml b/dbms/tests/performance/agg_functions_min_max_any.xml index 8a132bb79a9..c63d0098262 100644 --- a/dbms/tests/performance/agg_functions_min_max_any.xml +++ b/dbms/tests/performance/agg_functions_min_max_any.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/and_function.xml b/dbms/tests/performance/and_function.xml index da9a3d271ab..1fb9dfa53f9 100644 --- a/dbms/tests/performance/and_function.xml +++ b/dbms/tests/performance/and_function.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/arithmetic.xml b/dbms/tests/performance/arithmetic.xml index 348273bebbf..3af85a260f3 100644 --- a/dbms/tests/performance/arithmetic.xml +++ b/dbms/tests/performance/arithmetic.xml @@ -1,5 +1,4 @@ - loop 10 diff --git a/dbms/tests/performance/array_element.xml b/dbms/tests/performance/array_element.xml index f4a33810fdd..4cc9bdc4a38 100644 --- a/dbms/tests/performance/array_element.xml +++ b/dbms/tests/performance/array_element.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/array_fill.xml b/dbms/tests/performance/array_fill.xml index 51204299fad..ccd2c5eba7c 100644 --- a/dbms/tests/performance/array_fill.xml +++ b/dbms/tests/performance/array_fill.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/array_join.xml b/dbms/tests/performance/array_join.xml index e5025695d15..c95681d4d29 100644 --- a/dbms/tests/performance/array_join.xml +++ b/dbms/tests/performance/array_join.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/base64.xml b/dbms/tests/performance/base64.xml index 232b4d20ba3..c25694841f1 100644 --- a/dbms/tests/performance/base64.xml +++ b/dbms/tests/performance/base64.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/base64_hits.xml b/dbms/tests/performance/base64_hits.xml index 1a1be4842bc..d585408fff8 100644 --- a/dbms/tests/performance/base64_hits.xml +++ b/dbms/tests/performance/base64_hits.xml @@ -1,5 +1,4 @@ - loop hits_100m_single diff --git a/dbms/tests/performance/basename.xml b/dbms/tests/performance/basename.xml index 6af67bc94c4..691fc38b8ca 100644 --- a/dbms/tests/performance/basename.xml +++ b/dbms/tests/performance/basename.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/bitCount.xml b/dbms/tests/performance/bitCount.xml index 60901885dbd..34fdb24c10b 100644 --- a/dbms/tests/performance/bitCount.xml +++ b/dbms/tests/performance/bitCount.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/bit_operations_fixed_string.xml b/dbms/tests/performance/bit_operations_fixed_string.xml index 53071b94d49..90df91f1025 100644 --- a/dbms/tests/performance/bit_operations_fixed_string.xml +++ b/dbms/tests/performance/bit_operations_fixed_string.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/bit_operations_fixed_string_numbers.xml b/dbms/tests/performance/bit_operations_fixed_string_numbers.xml index 2ba3ffb5d86..779aea19cdc 100644 --- a/dbms/tests/performance/bit_operations_fixed_string_numbers.xml +++ b/dbms/tests/performance/bit_operations_fixed_string_numbers.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/bloom_filter.xml b/dbms/tests/performance/bloom_filter.xml index ef35af9965d..079b7a43da3 100644 --- a/dbms/tests/performance/bloom_filter.xml +++ b/dbms/tests/performance/bloom_filter.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/bounding_ratio.xml b/dbms/tests/performance/bounding_ratio.xml index 4bf50f57290..0d0adfaea45 100644 --- a/dbms/tests/performance/bounding_ratio.xml +++ b/dbms/tests/performance/bounding_ratio.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/cidr.xml b/dbms/tests/performance/cidr.xml index 1ca7f691881..938734e3709 100644 --- a/dbms/tests/performance/cidr.xml +++ b/dbms/tests/performance/cidr.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/codecs_float_insert.xml b/dbms/tests/performance/codecs_float_insert.xml index 1bbbf6b2d92..706a2f3c0a0 100644 --- a/dbms/tests/performance/codecs_float_insert.xml +++ b/dbms/tests/performance/codecs_float_insert.xml @@ -1,6 +1,5 @@ - loop 10 diff --git a/dbms/tests/performance/codecs_float_select.xml b/dbms/tests/performance/codecs_float_select.xml index 1d3957c8da9..4c2f671a90e 100644 --- a/dbms/tests/performance/codecs_float_select.xml +++ b/dbms/tests/performance/codecs_float_select.xml @@ -1,6 +1,5 @@ - loop 10 diff --git a/dbms/tests/performance/codecs_int_insert.xml b/dbms/tests/performance/codecs_int_insert.xml index eea263e601a..1226d9020a0 100644 --- a/dbms/tests/performance/codecs_int_insert.xml +++ b/dbms/tests/performance/codecs_int_insert.xml @@ -1,6 +1,5 @@ - loop 10 diff --git a/dbms/tests/performance/codecs_int_select.xml b/dbms/tests/performance/codecs_int_select.xml index 40ebfd4d000..8054c2b2de4 100644 --- a/dbms/tests/performance/codecs_int_select.xml +++ b/dbms/tests/performance/codecs_int_select.xml @@ -1,6 +1,5 @@ - loop 10 diff --git a/dbms/tests/performance/collations.xml b/dbms/tests/performance/collations.xml index 1bec38dd103..03d77fa5e27 100644 --- a/dbms/tests/performance/collations.xml +++ b/dbms/tests/performance/collations.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/column_column_comparison.xml b/dbms/tests/performance/column_column_comparison.xml index 9d4446d7c2d..7559e03e506 100644 --- a/dbms/tests/performance/column_column_comparison.xml +++ b/dbms/tests/performance/column_column_comparison.xml @@ -7,7 +7,6 @@ hits_100m_single - loop diff --git a/dbms/tests/performance/columns_hashing.xml b/dbms/tests/performance/columns_hashing.xml index 138855dae89..ca330b0d435 100644 --- a/dbms/tests/performance/columns_hashing.xml +++ b/dbms/tests/performance/columns_hashing.xml @@ -8,7 +8,6 @@ hits_1000m_single - loop diff --git a/dbms/tests/performance/complex_array_creation.xml b/dbms/tests/performance/complex_array_creation.xml index 76e4910a1d7..abcea2671e7 100644 --- a/dbms/tests/performance/complex_array_creation.xml +++ b/dbms/tests/performance/complex_array_creation.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/concat_hits.xml b/dbms/tests/performance/concat_hits.xml index e2c6fc23c08..49ab27bf540 100644 --- a/dbms/tests/performance/concat_hits.xml +++ b/dbms/tests/performance/concat_hits.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/conditional.xml b/dbms/tests/performance/conditional.xml index eea43d6556a..2a4f4ccad6e 100644 --- a/dbms/tests/performance/conditional.xml +++ b/dbms/tests/performance/conditional.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/consistent_hashes.xml b/dbms/tests/performance/consistent_hashes.xml index 5929c6388d5..349f2c3b05f 100644 --- a/dbms/tests/performance/consistent_hashes.xml +++ b/dbms/tests/performance/consistent_hashes.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/constant_column_comparison.xml b/dbms/tests/performance/constant_column_comparison.xml index f32ed444a0c..34e6193675c 100644 --- a/dbms/tests/performance/constant_column_comparison.xml +++ b/dbms/tests/performance/constant_column_comparison.xml @@ -7,7 +7,6 @@ hits_100m_single - loop diff --git a/dbms/tests/performance/constant_column_search.xml b/dbms/tests/performance/constant_column_search.xml index 9953c2797a2..0c19624f5bc 100644 --- a/dbms/tests/performance/constant_column_search.xml +++ b/dbms/tests/performance/constant_column_search.xml @@ -7,7 +7,6 @@ hits_100m_single - loop diff --git a/dbms/tests/performance/count.xml b/dbms/tests/performance/count.xml index 3bb4a0d2cd5..0d8470bbf3a 100644 --- a/dbms/tests/performance/count.xml +++ b/dbms/tests/performance/count.xml @@ -1,6 +1,5 @@ - loop diff --git a/dbms/tests/performance/cpu_synthetic.xml b/dbms/tests/performance/cpu_synthetic.xml index dd7ac14ccf8..762115756de 100644 --- a/dbms/tests/performance/cpu_synthetic.xml +++ b/dbms/tests/performance/cpu_synthetic.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/cryptographic_hashes.xml b/dbms/tests/performance/cryptographic_hashes.xml index 7bafb25f299..6dcc05d17fb 100644 --- a/dbms/tests/performance/cryptographic_hashes.xml +++ b/dbms/tests/performance/cryptographic_hashes.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/date_parsing.xml b/dbms/tests/performance/date_parsing.xml index 8ecf3681804..f2e3954948e 100644 --- a/dbms/tests/performance/date_parsing.xml +++ b/dbms/tests/performance/date_parsing.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/date_time.xml b/dbms/tests/performance/date_time.xml index 4e9cd2c4abd..858b20d5784 100644 --- a/dbms/tests/performance/date_time.xml +++ b/dbms/tests/performance/date_time.xml @@ -1,6 +1,5 @@ - loop long diff --git a/dbms/tests/performance/date_time_64.xml b/dbms/tests/performance/date_time_64.xml index b345550b335..ccce5b54ecc 100644 --- a/dbms/tests/performance/date_time_64.xml +++ b/dbms/tests/performance/date_time_64.xml @@ -1,5 +1,4 @@ - loop default.hits_100m_single diff --git a/dbms/tests/performance/decimal_aggregates.xml b/dbms/tests/performance/decimal_aggregates.xml index 86830fedce6..c6639ea1053 100644 --- a/dbms/tests/performance/decimal_aggregates.xml +++ b/dbms/tests/performance/decimal_aggregates.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE t (x UInt64, d32 Decimal32(3), d64 Decimal64(4), d128 Decimal128(5)) ENGINE = Memory INSERT INTO t SELECT number AS x, x AS d32, x AS d64, x d128 FROM numbers(1000000) diff --git a/dbms/tests/performance/early_constant_folding.xml b/dbms/tests/performance/early_constant_folding.xml index ad2d1619eb9..2cc68a7b8c6 100644 --- a/dbms/tests/performance/early_constant_folding.xml +++ b/dbms/tests/performance/early_constant_folding.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/empty_string_deserialization.xml b/dbms/tests/performance/empty_string_deserialization.xml index c56f67ab274..d4b4f338a85 100644 --- a/dbms/tests/performance/empty_string_deserialization.xml +++ b/dbms/tests/performance/empty_string_deserialization.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/empty_string_serialization.xml b/dbms/tests/performance/empty_string_serialization.xml index 46c4bc0275c..62b2e13f0af 100644 --- a/dbms/tests/performance/empty_string_serialization.xml +++ b/dbms/tests/performance/empty_string_serialization.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/entropy.xml b/dbms/tests/performance/entropy.xml index 45c9ccb840d..e5a96be165c 100644 --- a/dbms/tests/performance/entropy.xml +++ b/dbms/tests/performance/entropy.xml @@ -1,5 +1,4 @@ - loop test.hits diff --git a/dbms/tests/performance/first_significant_subdomain.xml b/dbms/tests/performance/first_significant_subdomain.xml index 705e70b86f9..d8ef4b01e54 100644 --- a/dbms/tests/performance/first_significant_subdomain.xml +++ b/dbms/tests/performance/first_significant_subdomain.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/fixed_string16.xml b/dbms/tests/performance/fixed_string16.xml index 398f09aba3d..6dd86ea7479 100644 --- a/dbms/tests/performance/fixed_string16.xml +++ b/dbms/tests/performance/fixed_string16.xml @@ -7,7 +7,6 @@ test.hits - loop diff --git a/dbms/tests/performance/float_formatting.xml b/dbms/tests/performance/float_formatting.xml index aaf2fad0c93..941e1bcaaa8 100644 --- a/dbms/tests/performance/float_formatting.xml +++ b/dbms/tests/performance/float_formatting.xml @@ -1,5 +1,4 @@ - loop long diff --git a/dbms/tests/performance/float_parsing.xml b/dbms/tests/performance/float_parsing.xml index e7779751fa4..f75de81c698 100644 --- a/dbms/tests/performance/float_parsing.xml +++ b/dbms/tests/performance/float_parsing.xml @@ -1,5 +1,4 @@ - loop long diff --git a/dbms/tests/performance/format_date_time.xml b/dbms/tests/performance/format_date_time.xml index aa070c40ec5..aed8fd7dc77 100644 --- a/dbms/tests/performance/format_date_time.xml +++ b/dbms/tests/performance/format_date_time.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/functions_coding.xml b/dbms/tests/performance/functions_coding.xml index 93e16a8a221..52ada24b5bc 100644 --- a/dbms/tests/performance/functions_coding.xml +++ b/dbms/tests/performance/functions_coding.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/functions_geo.xml b/dbms/tests/performance/functions_geo.xml index a4233b2fe57..207d39c52b7 100644 --- a/dbms/tests/performance/functions_geo.xml +++ b/dbms/tests/performance/functions_geo.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/general_purpose_hashes.xml b/dbms/tests/performance/general_purpose_hashes.xml index 94c8d5d4b2b..cc40c7fe1e3 100644 --- a/dbms/tests/performance/general_purpose_hashes.xml +++ b/dbms/tests/performance/general_purpose_hashes.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/general_purpose_hashes_on_UUID.xml b/dbms/tests/performance/general_purpose_hashes_on_UUID.xml index 964879148d0..3cb14e4c87c 100644 --- a/dbms/tests/performance/general_purpose_hashes_on_UUID.xml +++ b/dbms/tests/performance/general_purpose_hashes_on_UUID.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/generate_table_function.xml b/dbms/tests/performance/generate_table_function.xml index 4674b81af99..48b9b22df1c 100644 --- a/dbms/tests/performance/generate_table_function.xml +++ b/dbms/tests/performance/generate_table_function.xml @@ -1,9 +1,7 @@ - loop - 4000 10000 diff --git a/dbms/tests/performance/great_circle_dist.xml b/dbms/tests/performance/great_circle_dist.xml index 3edfc2c8008..3b88d00eb63 100644 --- a/dbms/tests/performance/great_circle_dist.xml +++ b/dbms/tests/performance/great_circle_dist.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/group_array_moving_sum.xml b/dbms/tests/performance/group_array_moving_sum.xml index 6939989c5b4..6da1752e1f6 100644 --- a/dbms/tests/performance/group_array_moving_sum.xml +++ b/dbms/tests/performance/group_array_moving_sum.xml @@ -1,6 +1,5 @@ - loop diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml index 3a6d5940d0d..a09ac88f727 100644 --- a/dbms/tests/performance/h3.xml +++ b/dbms/tests/performance/h3.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/if_array_num.xml b/dbms/tests/performance/if_array_num.xml index d4c9c29dd99..4ae4db3afdc 100644 --- a/dbms/tests/performance/if_array_num.xml +++ b/dbms/tests/performance/if_array_num.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/if_array_string.xml b/dbms/tests/performance/if_array_string.xml index 235051fc905..95dfb809230 100644 --- a/dbms/tests/performance/if_array_string.xml +++ b/dbms/tests/performance/if_array_string.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/if_string_const.xml b/dbms/tests/performance/if_string_const.xml index 5b06440473f..5ab8455c948 100644 --- a/dbms/tests/performance/if_string_const.xml +++ b/dbms/tests/performance/if_string_const.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/if_string_hits.xml b/dbms/tests/performance/if_string_hits.xml index 267c8b039e5..ec9ea39f7cf 100644 --- a/dbms/tests/performance/if_string_hits.xml +++ b/dbms/tests/performance/if_string_hits.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/if_to_multiif.xml b/dbms/tests/performance/if_to_multiif.xml index e1c45bbb69e..373318c316c 100644 --- a/dbms/tests/performance/if_to_multiif.xml +++ b/dbms/tests/performance/if_to_multiif.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/information_value.xml b/dbms/tests/performance/information_value.xml index f5b73a18abc..6f94d828eb9 100644 --- a/dbms/tests/performance/information_value.xml +++ b/dbms/tests/performance/information_value.xml @@ -1,5 +1,4 @@ - loop hits_100m_single diff --git a/dbms/tests/performance/insert_values_with_expressions.xml b/dbms/tests/performance/insert_values_with_expressions.xml index 66fe2aef18b..4464066c16e 100644 --- a/dbms/tests/performance/insert_values_with_expressions.xml +++ b/dbms/tests/performance/insert_values_with_expressions.xml @@ -1,5 +1,4 @@ - loop 1 diff --git a/dbms/tests/performance/inserts_arrays_lowcardinality.xml b/dbms/tests/performance/inserts_arrays_lowcardinality.xml index a453cfb07f8..bca5c858576 100644 --- a/dbms/tests/performance/inserts_arrays_lowcardinality.xml +++ b/dbms/tests/performance/inserts_arrays_lowcardinality.xml @@ -1,5 +1,4 @@ - loop 5 diff --git a/dbms/tests/performance/int_parsing.xml b/dbms/tests/performance/int_parsing.xml index 51f740523ba..8a6475546bf 100644 --- a/dbms/tests/performance/int_parsing.xml +++ b/dbms/tests/performance/int_parsing.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/jit_large_requests.xml b/dbms/tests/performance/jit_large_requests.xml index 54aa2af65b1..03ef588ca87 100644 --- a/dbms/tests/performance/jit_large_requests.xml +++ b/dbms/tests/performance/jit_large_requests.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/jit_small_requests.xml b/dbms/tests/performance/jit_small_requests.xml index d65e14cb97e..edf9311eb05 100644 --- a/dbms/tests/performance/jit_small_requests.xml +++ b/dbms/tests/performance/jit_small_requests.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/joins_in_memory.xml b/dbms/tests/performance/joins_in_memory.xml index f624030d7d4..1d3b14ae962 100644 --- a/dbms/tests/performance/joins_in_memory.xml +++ b/dbms/tests/performance/joins_in_memory.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/joins_in_memory_pmj.xml b/dbms/tests/performance/joins_in_memory_pmj.xml index 0352268c846..19383467fa1 100644 --- a/dbms/tests/performance/joins_in_memory_pmj.xml +++ b/dbms/tests/performance/joins_in_memory_pmj.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/json_extract_rapidjson.xml b/dbms/tests/performance/json_extract_rapidjson.xml index 8a2718d4a56..42b89456c9c 100644 --- a/dbms/tests/performance/json_extract_rapidjson.xml +++ b/dbms/tests/performance/json_extract_rapidjson.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/json_extract_simdjson.xml b/dbms/tests/performance/json_extract_simdjson.xml index f3a38912b0f..1e0c992802e 100644 --- a/dbms/tests/performance/json_extract_simdjson.xml +++ b/dbms/tests/performance/json_extract_simdjson.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/leftpad.xml b/dbms/tests/performance/leftpad.xml index a0717adbbd8..eb0b09c72ed 100644 --- a/dbms/tests/performance/leftpad.xml +++ b/dbms/tests/performance/leftpad.xml @@ -8,7 +8,6 @@ hashfile - loop diff --git a/dbms/tests/performance/linear_regression.xml b/dbms/tests/performance/linear_regression.xml index c358e21af05..87fa034d851 100644 --- a/dbms/tests/performance/linear_regression.xml +++ b/dbms/tests/performance/linear_regression.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/logical_functions_large.xml b/dbms/tests/performance/logical_functions_large.xml index b90023c3f60..a87b41ec916 100644 --- a/dbms/tests/performance/logical_functions_large.xml +++ b/dbms/tests/performance/logical_functions_large.xml @@ -1,6 +1,5 @@ 1 - loop diff --git a/dbms/tests/performance/logical_functions_medium.xml b/dbms/tests/performance/logical_functions_medium.xml index 0f6de7ea23e..087917040d9 100644 --- a/dbms/tests/performance/logical_functions_medium.xml +++ b/dbms/tests/performance/logical_functions_medium.xml @@ -1,6 +1,5 @@ 1 - loop diff --git a/dbms/tests/performance/logical_functions_small.xml b/dbms/tests/performance/logical_functions_small.xml index a2c90346ed6..ed6ab2afde6 100644 --- a/dbms/tests/performance/logical_functions_small.xml +++ b/dbms/tests/performance/logical_functions_small.xml @@ -1,6 +1,5 @@ 1 - loop diff --git a/dbms/tests/performance/math.xml b/dbms/tests/performance/math.xml index 280f0821964..6ab497749f1 100644 --- a/dbms/tests/performance/math.xml +++ b/dbms/tests/performance/math.xml @@ -1,6 +1,5 @@ - loop diff --git a/dbms/tests/performance/merge_table_streams.xml b/dbms/tests/performance/merge_table_streams.xml index f1816e85097..084fa2da575 100644 --- a/dbms/tests/performance/merge_table_streams.xml +++ b/dbms/tests/performance/merge_table_streams.xml @@ -1,5 +1,4 @@ - loop hits_100m_single diff --git a/dbms/tests/performance/merge_tree_huge_pk.xml b/dbms/tests/performance/merge_tree_huge_pk.xml index e39ff7501f7..1636fd52e2d 100644 --- a/dbms/tests/performance/merge_tree_huge_pk.xml +++ b/dbms/tests/performance/merge_tree_huge_pk.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/merge_tree_many_partitions.xml b/dbms/tests/performance/merge_tree_many_partitions.xml index 6eb110bfab9..33bb12ed22b 100644 --- a/dbms/tests/performance/merge_tree_many_partitions.xml +++ b/dbms/tests/performance/merge_tree_many_partitions.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE bad_partitions (x UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x INSERT INTO bad_partitions SELECT * FROM numbers(10000) diff --git a/dbms/tests/performance/merge_tree_many_partitions_2.xml b/dbms/tests/performance/merge_tree_many_partitions_2.xml index 35d158abf83..42bb0ac29c9 100644 --- a/dbms/tests/performance/merge_tree_many_partitions_2.xml +++ b/dbms/tests/performance/merge_tree_many_partitions_2.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE bad_partitions (a UInt64, b UInt64, c UInt64, d UInt64, e UInt64, f UInt64, g UInt64, h UInt64, i UInt64, j UInt64, k UInt64, l UInt64, m UInt64, n UInt64, o UInt64, p UInt64, q UInt64, r UInt64, s UInt64, t UInt64, u UInt64, v UInt64, w UInt64, x UInt64, y UInt64, z UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x INSERT INTO bad_partitions (x) SELECT * FROM numbers_mt(3000) diff --git a/dbms/tests/performance/merge_tree_simple_select.xml b/dbms/tests/performance/merge_tree_simple_select.xml index 5600e12a5db..f38a5241cb5 100644 --- a/dbms/tests/performance/merge_tree_simple_select.xml +++ b/dbms/tests/performance/merge_tree_simple_select.xml @@ -1,6 +1,5 @@ - loop diff --git a/dbms/tests/performance/mingroupby-orderbylimit1.xml b/dbms/tests/performance/mingroupby-orderbylimit1.xml index ec69ffa2b8f..34cd992558b 100644 --- a/dbms/tests/performance/mingroupby-orderbylimit1.xml +++ b/dbms/tests/performance/mingroupby-orderbylimit1.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/modulo.xml b/dbms/tests/performance/modulo.xml index 8e6674d0980..e31de5c1701 100644 --- a/dbms/tests/performance/modulo.xml +++ b/dbms/tests/performance/modulo.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/ngram_distance.xml b/dbms/tests/performance/ngram_distance.xml index 78da4d55d0e..e90f49155b1 100644 --- a/dbms/tests/performance/ngram_distance.xml +++ b/dbms/tests/performance/ngram_distance.xml @@ -8,7 +8,6 @@ hits_10m_single - loop 20000000000 diff --git a/dbms/tests/performance/number_formatting_formats.xml b/dbms/tests/performance/number_formatting_formats.xml index aa9929464fb..c2a9a9c081d 100644 --- a/dbms/tests/performance/number_formatting_formats.xml +++ b/dbms/tests/performance/number_formatting_formats.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE IF NOT EXISTS table_{format} (x UInt64) ENGINE = File(`{format}`) diff --git a/dbms/tests/performance/nyc_taxi.xml b/dbms/tests/performance/nyc_taxi.xml index 7648e377433..92a1dd59441 100644 --- a/dbms/tests/performance/nyc_taxi.xml +++ b/dbms/tests/performance/nyc_taxi.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/order_by_decimals.xml b/dbms/tests/performance/order_by_decimals.xml index c6a7e7f72df..5479181fb08 100644 --- a/dbms/tests/performance/order_by_decimals.xml +++ b/dbms/tests/performance/order_by_decimals.xml @@ -4,7 +4,6 @@ comparison - loop diff --git a/dbms/tests/performance/order_by_read_in_order.xml b/dbms/tests/performance/order_by_read_in_order.xml index a99dd89846e..e37e4df4681 100644 --- a/dbms/tests/performance/order_by_read_in_order.xml +++ b/dbms/tests/performance/order_by_read_in_order.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/order_by_single_column.xml b/dbms/tests/performance/order_by_single_column.xml index ed247641ca8..148b14e8959 100644 --- a/dbms/tests/performance/order_by_single_column.xml +++ b/dbms/tests/performance/order_by_single_column.xml @@ -8,7 +8,6 @@ hits_100m_single - loop diff --git a/dbms/tests/performance/parallel_insert.xml b/dbms/tests/performance/parallel_insert.xml index 44a2964f881..6da1a2cc020 100644 --- a/dbms/tests/performance/parallel_insert.xml +++ b/dbms/tests/performance/parallel_insert.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/parse_engine_file.xml b/dbms/tests/performance/parse_engine_file.xml index 8a0054bdd7f..fb10fa97915 100644 --- a/dbms/tests/performance/parse_engine_file.xml +++ b/dbms/tests/performance/parse_engine_file.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}) AS test.hits diff --git a/dbms/tests/performance/prewhere.xml b/dbms/tests/performance/prewhere.xml index e4c9cc749ff..e3350d765ee 100644 --- a/dbms/tests/performance/prewhere.xml +++ b/dbms/tests/performance/prewhere.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/random_printable_ascii.xml b/dbms/tests/performance/random_printable_ascii.xml index 5fca705464e..320ffeac796 100644 --- a/dbms/tests/performance/random_printable_ascii.xml +++ b/dbms/tests/performance/random_printable_ascii.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/range.xml b/dbms/tests/performance/range.xml index ee61a22b0cf..95b8455057e 100644 --- a/dbms/tests/performance/range.xml +++ b/dbms/tests/performance/range.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/read_hits_with_aio.xml b/dbms/tests/performance/read_hits_with_aio.xml index 5fa3f70ed86..850fd0fbadc 100644 --- a/dbms/tests/performance/read_hits_with_aio.xml +++ b/dbms/tests/performance/read_hits_with_aio.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/right.xml b/dbms/tests/performance/right.xml index 55095d251f7..73030e52f21 100644 --- a/dbms/tests/performance/right.xml +++ b/dbms/tests/performance/right.xml @@ -1,5 +1,4 @@ - loop hits_100m_single diff --git a/dbms/tests/performance/round_down.xml b/dbms/tests/performance/round_down.xml index 880b625af28..353f169ae8d 100644 --- a/dbms/tests/performance/round_down.xml +++ b/dbms/tests/performance/round_down.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/round_methods.xml b/dbms/tests/performance/round_methods.xml index 54bd1e4af17..0e560b2eae6 100644 --- a/dbms/tests/performance/round_methods.xml +++ b/dbms/tests/performance/round_methods.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/scalar.xml b/dbms/tests/performance/scalar.xml index d1bc661c58f..e8e487a80da 100644 --- a/dbms/tests/performance/scalar.xml +++ b/dbms/tests/performance/scalar.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/select_format.xml b/dbms/tests/performance/select_format.xml index 189b35a2700..b8df874304f 100644 --- a/dbms/tests/performance/select_format.xml +++ b/dbms/tests/performance/select_format.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}, '/dev/null') AS test.hits diff --git a/dbms/tests/performance/set.xml b/dbms/tests/performance/set.xml index 75b87d38abe..8d50dbbce23 100644 --- a/dbms/tests/performance/set.xml +++ b/dbms/tests/performance/set.xml @@ -1,5 +1,4 @@ - loop long diff --git a/dbms/tests/performance/set_hits.xml b/dbms/tests/performance/set_hits.xml index a5e61625604..09860aa1cd7 100644 --- a/dbms/tests/performance/set_hits.xml +++ b/dbms/tests/performance/set_hits.xml @@ -1,5 +1,4 @@ - loop hits_10m_single diff --git a/dbms/tests/performance/set_index.xml b/dbms/tests/performance/set_index.xml index 4e24b7ccd79..1d1b2460e85 100644 --- a/dbms/tests/performance/set_index.xml +++ b/dbms/tests/performance/set_index.xml @@ -1,5 +1,4 @@ - loop CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a INSERT INTO test_in SELECT number FROM numbers(500000000) diff --git a/dbms/tests/performance/simple_join_query.xml b/dbms/tests/performance/simple_join_query.xml index 8ef3d97460d..8f62ffdfd00 100644 --- a/dbms/tests/performance/simple_join_query.xml +++ b/dbms/tests/performance/simple_join_query.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/slices_hits.xml b/dbms/tests/performance/slices_hits.xml index ad01a607b8a..1745df3328c 100644 --- a/dbms/tests/performance/slices_hits.xml +++ b/dbms/tests/performance/slices_hits.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/sort.xml b/dbms/tests/performance/sort.xml index 3aa7ed5788f..da7f2fd5410 100644 --- a/dbms/tests/performance/sort.xml +++ b/dbms/tests/performance/sort.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/string_join.xml b/dbms/tests/performance/string_join.xml index 228fe3182b8..3988845641c 100644 --- a/dbms/tests/performance/string_join.xml +++ b/dbms/tests/performance/string_join.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/string_set.xml b/dbms/tests/performance/string_set.xml index cf6261d6d60..95612fb2d34 100644 --- a/dbms/tests/performance/string_set.xml +++ b/dbms/tests/performance/string_set.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/string_sort.xml b/dbms/tests/performance/string_sort.xml index b72b073172a..6a4e68270f9 100644 --- a/dbms/tests/performance/string_sort.xml +++ b/dbms/tests/performance/string_sort.xml @@ -4,7 +4,6 @@ hits_100m_single - loop diff --git a/dbms/tests/performance/sum_map.xml b/dbms/tests/performance/sum_map.xml index ac1ccaae0fe..a88983fdbea 100644 --- a/dbms/tests/performance/sum_map.xml +++ b/dbms/tests/performance/sum_map.xml @@ -1,5 +1,4 @@ - loop diff --git a/dbms/tests/performance/synthetic_hardware_benchmark.xml b/dbms/tests/performance/synthetic_hardware_benchmark.xml index 94d8ef76fa1..055f8f67ee5 100644 --- a/dbms/tests/performance/synthetic_hardware_benchmark.xml +++ b/dbms/tests/performance/synthetic_hardware_benchmark.xml @@ -1,5 +1,4 @@ - once diff --git a/dbms/tests/performance/trim_numbers.xml b/dbms/tests/performance/trim_numbers.xml index 997272e95f6..62e26f8245a 100644 --- a/dbms/tests/performance/trim_numbers.xml +++ b/dbms/tests/performance/trim_numbers.xml @@ -1,12 +1,10 @@ - loop 10000 - 5000 20000 diff --git a/dbms/tests/performance/trim_urls.xml b/dbms/tests/performance/trim_urls.xml index 23dd3f77f6e..f29d878682f 100644 --- a/dbms/tests/performance/trim_urls.xml +++ b/dbms/tests/performance/trim_urls.xml @@ -1,5 +1,4 @@ - loop hits_100m_single @@ -10,7 +9,6 @@ 10000 - 5000 20000 diff --git a/dbms/tests/performance/trim_whitespace.xml b/dbms/tests/performance/trim_whitespace.xml index 2038d8f5647..8ec4aeaa54e 100644 --- a/dbms/tests/performance/trim_whitespace.xml +++ b/dbms/tests/performance/trim_whitespace.xml @@ -1,5 +1,4 @@ - loop create table if not exists whitespaces diff --git a/dbms/tests/performance/uniq.xml b/dbms/tests/performance/uniq.xml index 9de2ecdf72b..0b7c8e58c86 100644 --- a/dbms/tests/performance/uniq.xml +++ b/dbms/tests/performance/uniq.xml @@ -1,5 +1,4 @@ - loop hits_100m_single diff --git a/dbms/tests/performance/url_hits.xml b/dbms/tests/performance/url_hits.xml index d4e504cd1b8..f9383eb3910 100644 --- a/dbms/tests/performance/url_hits.xml +++ b/dbms/tests/performance/url_hits.xml @@ -1,5 +1,4 @@ - loop hits_100m_single @@ -10,7 +9,6 @@ 10000 - 5000 20000 diff --git a/dbms/tests/performance/vectorize_aggregation_combinators.xml b/dbms/tests/performance/vectorize_aggregation_combinators.xml index 73024f454f9..49af4ae0f07 100644 --- a/dbms/tests/performance/vectorize_aggregation_combinators.xml +++ b/dbms/tests/performance/vectorize_aggregation_combinators.xml @@ -1,13 +1,11 @@ - loop 30000 - 6000 60000 diff --git a/dbms/tests/performance/visit_param_extract_raw.xml b/dbms/tests/performance/visit_param_extract_raw.xml index 0faa43088e7..5db6b11a5e0 100644 --- a/dbms/tests/performance/visit_param_extract_raw.xml +++ b/dbms/tests/performance/visit_param_extract_raw.xml @@ -1,14 +1,10 @@ - once - - 4000 10000 - param @@ -20,5 +16,5 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(visitParamExtractRaw(materialize({param}), 'myparam')) + SELECT count() FROM numbers(2000000) WHERE NOT ignore(visitParamExtractRaw(materialize({param}), 'myparam')) diff --git a/dbms/tests/performance/website.xml b/dbms/tests/performance/website.xml index 83a1c3607c7..ef97d118b85 100644 --- a/dbms/tests/performance/website.xml +++ b/dbms/tests/performance/website.xml @@ -1,5 +1,4 @@ - loop hits_10m_single From edc9f9886c284a93477f72f5ff941b35d4ffbadf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 22:52:07 +0300 Subject: [PATCH 190/215] Remove unused features from performance test --- .../performance-test/PerformanceTest.cpp | 21 ++---- .../performance-test/PerformanceTestInfo.cpp | 17 ----- .../performance-test/PerformanceTestInfo.h | 7 -- .../performance-test/ReportBuilder.cpp | 73 +++++++------------ .../performance-test/StopConditionsSet.cpp | 3 - .../performance-test/StopConditionsSet.h | 1 - dbms/programs/performance-test/TestStats.cpp | 63 ---------------- dbms/programs/performance-test/TestStats.h | 17 ----- .../performance-test/TestStopConditions.h | 2 - .../performance-test/executeQuery.cpp | 2 - 10 files changed, 36 insertions(+), 170 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e2c5c0d8741..a2e0aa933b8 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -305,22 +305,17 @@ void PerformanceTest::runQueries( statistics.startWatches(); try { - executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings); - - if (test_info.exec_type == ExecutionType::Loop) + LOG_INFO(log, "Will run query in loop"); + for (size_t iteration = 0; !statistics.got_SIGINT; ++iteration) { - LOG_INFO(log, "Will run query in loop"); - for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) + stop_conditions.reportIterations(iteration); + if (stop_conditions.areFulfilled()) { - stop_conditions.reportIterations(iteration); - if (stop_conditions.areFulfilled()) - { - LOG_INFO(log, "Stop conditions fulfilled"); - break; - } - - executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings); + LOG_INFO(log, "Stop conditions fulfilled"); + break; } + + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings); } } catch (const Exception & e) diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index b0f877abfc7..ef48ffae9df 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -54,7 +54,6 @@ PerformanceTestInfo::PerformanceTestInfo( extractQueries(config); extractAuxiliaryQueries(config); processSubstitutions(config); - getExecutionType(config); getStopConditions(config); } @@ -141,22 +140,6 @@ void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) } } -void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) -{ - if (!config->has("type")) - throw Exception("Missing type property in config: " + test_name, - ErrorCodes::BAD_ARGUMENTS); - - std::string config_exec_type = config->getString("type"); - if (config_exec_type == "loop") - exec_type = ExecutionType::Loop; - else if (config_exec_type == "once") - exec_type = ExecutionType::Once; - else - throw Exception("Unknown type " + config_exec_type + " in :" + test_name, - ErrorCodes::BAD_ARGUMENTS); -} - void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) { diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index 8e6b1c5f43a..d40f5d3f19f 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -12,11 +12,6 @@ namespace DB { -enum class ExecutionType -{ - Loop, - Once -}; using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; @@ -34,7 +29,6 @@ public: Strings queries; Settings settings; - ExecutionType exec_type; StringToVector substitutions; size_t times_to_run; @@ -47,7 +41,6 @@ private: void applySettings(XMLConfigurationPtr config); void extractQueries(XMLConfigurationPtr config); void processSubstitutions(XMLConfigurationPtr config); - void getExecutionType(XMLConfigurationPtr config); void getStopConditions(XMLConfigurationPtr config); void extractAuxiliaryQueries(XMLConfigurationPtr config); }; diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index c95b4d56a1e..ef4417f1713 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -17,13 +17,6 @@ namespace DB namespace { -std::string getMainMetric(const PerformanceTestInfo & test_info) -{ - if (test_info.exec_type == ExecutionType::Loop) - return "min_time"; - else - return "rows_per_second"; -} bool isASCIIString(const std::string & str) { @@ -120,50 +113,40 @@ std::string ReportBuilder::buildFullReport( runJSON.set("exception", "Some exception occurred with non ASCII message. This may produce invalid JSON. Try reproduce locally."); } - if (test_info.exec_type == ExecutionType::Loop) + /// in seconds + runJSON.set("min_time", statistics.min_time / double(1000)); + + if (statistics.sampler.size() != 0) { - /// in seconds - runJSON.set("min_time", statistics.min_time / double(1000)); - - if (statistics.sampler.size() != 0) + JSONString quantiles(4); /// here, 4 is the size of \t padding + for (double percent = 10; percent <= 90; percent += 10) { - JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) - { - std::string quantile_key = std::to_string(percent / 100.0); - while (quantile_key.back() == '0') - quantile_key.pop_back(); + std::string quantile_key = std::to_string(percent / 100.0); + while (quantile_key.back() == '0') + quantile_key.pop_back(); - quantiles.set(quantile_key, - statistics.sampler.quantileInterpolated(percent / 100.0)); - } - quantiles.set("0.95", - statistics.sampler.quantileInterpolated(95 / 100.0)); - quantiles.set("0.99", - statistics.sampler.quantileInterpolated(99 / 100.0)); - quantiles.set("0.999", - statistics.sampler.quantileInterpolated(99.9 / 100.0)); - quantiles.set("0.9999", - statistics.sampler.quantileInterpolated(99.99 / 100.0)); - - runJSON.set("quantiles", quantiles.asString()); + quantiles.set(quantile_key, + statistics.sampler.quantileInterpolated(percent / 100.0)); } + quantiles.set("0.95", + statistics.sampler.quantileInterpolated(95 / 100.0)); + quantiles.set("0.99", + statistics.sampler.quantileInterpolated(99 / 100.0)); + quantiles.set("0.999", + statistics.sampler.quantileInterpolated(99.9 / 100.0)); + quantiles.set("0.9999", + statistics.sampler.quantileInterpolated(99.99 / 100.0)); - runJSON.set("total_time", statistics.total_time); - - if (statistics.total_time != 0) - { - runJSON.set("queries_per_second", static_cast(statistics.queries) / statistics.total_time); - runJSON.set("rows_per_second", static_cast(statistics.total_rows_read) / statistics.total_time); - runJSON.set("bytes_per_second", static_cast(statistics.total_bytes_read) / statistics.total_time); - } + runJSON.set("quantiles", quantiles.asString()); } - else + + runJSON.set("total_time", statistics.total_time); + + if (statistics.total_time != 0) { - runJSON.set("max_rows_per_second", statistics.max_rows_speed); - runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); - runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); - runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); + runJSON.set("queries_per_second", static_cast(statistics.queries) / statistics.total_time); + runJSON.set("rows_per_second", static_cast(statistics.total_rows_read) / statistics.total_time); + runJSON.set("bytes_per_second", static_cast(statistics.total_bytes_read) / statistics.total_time); } runJSON.set("memory_usage", statistics.memory_usage); @@ -197,7 +180,7 @@ std::string ReportBuilder::buildCompactReport( output << "run " << std::to_string(number_of_launch + 1) << ": "; - std::string main_metric = getMainMetric(test_info); + std::string main_metric = "min_time"; output << main_metric << " = "; size_t index = number_of_launch * test_info.queries.size() + query_index; diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp index ab334b71244..9d0df07631b 100644 --- a/dbms/programs/performance-test/StopConditionsSet.cpp +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -28,8 +28,6 @@ void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_ min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); else if (key == "max_speed_not_changing_for_ms") max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "average_speed_not_changing_for_ms") - average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); else throw Exception("Met unknown stop condition: " + key, ErrorCodes::LOGICAL_ERROR); @@ -45,7 +43,6 @@ void StopConditionsSet::reset() iterations.fulfilled = false; min_time_not_changing_for_ms.fulfilled = false; max_speed_not_changing_for_ms.fulfilled = false; - average_speed_not_changing_for_ms.fulfilled = false; fulfilled_count = 0; } diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h index ad29c748a76..db34c9e4b51 100644 --- a/dbms/programs/performance-test/StopConditionsSet.h +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -30,7 +30,6 @@ struct StopConditionsSet StopCondition iterations; StopCondition min_time_not_changing_for_ms; StopCondition max_speed_not_changing_for_ms; - StopCondition average_speed_not_changing_for_ms; size_t initialized_count = 0; size_t fulfilled_count = 0; diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 4a3ec281d90..5268f8bb328 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -67,41 +67,6 @@ void TestStats::update_min_time(UInt64 min_time_candidate) } } -void TestStats::update_max_speed( - size_t max_speed_candidate, - Stopwatch & max_speed_watch, - UInt64 & max_speed) -{ - if (max_speed_candidate > max_speed) - { - max_speed = max_speed_candidate; - max_speed_watch.restart(); - } -} - - -void TestStats::update_average_speed( - double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value) -{ - avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); - ++number_of_info_batches; - avg_speed_value /= number_of_info_batches; - - if (avg_speed_first == 0) - avg_speed_first = avg_speed_value; - - auto [min, max] = std::minmax(avg_speed_value, avg_speed_first); - if (1 - min / max >= precision) - { - avg_speed_first = avg_speed_value; - avg_speed_watch.restart(); - } -} void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) { @@ -109,26 +74,6 @@ void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) total_bytes_read += bytes_read_inc; last_query_rows_read += rows_read_inc; last_query_bytes_read += bytes_read_inc; - - double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); - double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); - - /// Update rows speed - update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); - update_average_speed(new_rows_speed, - avg_rows_speed_watch, - number_of_rows_speed_info_batches, - avg_rows_speed_precision, - avg_rows_speed_first, - avg_rows_speed_value); - /// Update bytes speed - update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); - update_average_speed(new_bytes_speed, - avg_bytes_speed_watch, - number_of_bytes_speed_info_batches, - avg_bytes_speed_precision, - avg_bytes_speed_first, - avg_bytes_speed_value); } void TestStats::updateQueryInfo() @@ -144,10 +89,6 @@ TestStats::TestStats() watch.reset(); watch_per_query.reset(); min_time_watch.reset(); - max_rows_speed_watch.reset(); - max_bytes_speed_watch.reset(); - avg_rows_speed_watch.reset(); - avg_bytes_speed_watch.reset(); } @@ -156,10 +97,6 @@ void TestStats::startWatches() watch.start(); watch_per_query.start(); min_time_watch.start(); - max_rows_speed_watch.start(); - max_bytes_speed_watch.start(); - avg_rows_speed_watch.start(); - avg_bytes_speed_watch.start(); } } diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index b38ffa7386a..c88e50727c4 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -13,10 +13,6 @@ struct TestStats Stopwatch watch; Stopwatch watch_per_query; Stopwatch min_time_watch; - Stopwatch max_rows_speed_watch; - Stopwatch max_bytes_speed_watch; - Stopwatch avg_rows_speed_watch; - Stopwatch avg_bytes_speed_watch; bool last_query_was_cancelled = false; std::string query_id; @@ -62,19 +58,6 @@ struct TestStats void update_min_time(UInt64 min_time_candidate); - void update_average_speed( - double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value); - - void update_max_speed( - size_t max_speed_candidate, - Stopwatch & max_speed_watch, - UInt64 & max_speed); - void add(size_t rows_read_inc, size_t bytes_read_inc); void updateQueryInfo(); diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h index 2dcbcce4674..0bdfa094641 100644 --- a/dbms/programs/performance-test/TestStopConditions.h +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -32,8 +32,6 @@ public: DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) DEFINE_REPORT_FUNC(reportIterations, iterations) DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) #undef REPORT diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp index db82a48d0c1..6f0f4cbe3c5 100644 --- a/dbms/programs/performance-test/executeQuery.cpp +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -21,8 +21,6 @@ void checkFulfilledConditionsAndUpdate( stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); if (stop_conditions.areFulfilled()) { From 9a10457cb6bcec6f8c11ecfb3e216fb99df78038 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Mar 2020 22:56:38 +0300 Subject: [PATCH 191/215] Remove `indexHint` function (#9542) * Remove indexHint function * Fixed build * Update KeyCondition.cpp * Update KeyCondition.cpp --- dbms/src/Functions/indexHint.cpp | 63 ----------- .../registerFunctionsMiscellaneous.cpp | 2 - dbms/src/Interpreters/ActionsVisitor.cpp | 10 -- .../RequiredSourceColumnsVisitor.cpp | 3 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 12 +- .../Storages/MergeTree/MergeTreeIndexSet.cpp | 4 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 4 - dbms/src/Storages/MergeTree/RPNBuilder.h | 5 +- .../functions/other_functions.md | 105 +----------------- .../functions/other_functions.md | 105 ------------------ .../functions/other_functions.md | 96 ---------------- 11 files changed, 9 insertions(+), 400 deletions(-) delete mode 100644 dbms/src/Functions/indexHint.cpp diff --git a/dbms/src/Functions/indexHint.cpp b/dbms/src/Functions/indexHint.cpp deleted file mode 100644 index 0da2398ebd9..00000000000 --- a/dbms/src/Functions/indexHint.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include - - -namespace DB -{ - - -/** The `indexHint` function takes any number of any arguments and always returns one. - * - * This function has a special meaning (see ExpressionAnalyzer, KeyCondition) - * - the expressions inside it are not evaluated; - * - but when analyzing the index (selecting ranges for reading), this function is treated the same way, - * as if instead of using it the expression itself would be. - * - * Example: WHERE something AND indexHint(CounterID = 34) - * - do not read or calculate CounterID = 34, but select ranges in which the CounterID = 34 expression can be true. - * - * The function can be used for debugging purposes, as well as for (hidden from the user) query conversions. - */ -class FunctionIndexHint : public IFunction -{ -public: - static constexpr auto name = "indexHint"; - static FunctionPtr create(const Context &) - { - return std::make_shared(); - } - - bool isVariadic() const override - { - return true; - } - size_t getNumberOfArguments() const override - { - return 0; - } - - bool useDefaultImplementationForNulls() const override { return false; } - - String getName() const override - { - return name; - } - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override - { - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u); - } -}; - - -void registerFunctionIndexHint(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index 6b8b030d2c3..44e26542c7d 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -28,7 +28,6 @@ void registerFunctionSleepEachRow(FunctionFactory &); void registerFunctionMaterialize(FunctionFactory &); void registerFunctionIgnore(FunctionFactory &); void registerFunctionIgnoreExceptNull(FunctionFactory &); -void registerFunctionIndexHint(FunctionFactory &); void registerFunctionIdentity(FunctionFactory &); void registerFunctionArrayJoin(FunctionFactory &); void registerFunctionReplicate(FunctionFactory &); @@ -87,7 +86,6 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionMaterialize(factory); registerFunctionIgnore(factory); registerFunctionIgnoreExceptNull(factory); - registerFunctionIndexHint(factory); registerFunctionIdentity(factory); registerFunctionArrayJoin(factory); registerFunctionReplicate(factory); diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index ea13bb57b14..3a20ae6ce24 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -400,16 +400,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see KeyCondition). - if (node.name == "indexHint") - { - data.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( - ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), - column_name.get(ast)))); - return; - } - if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name)) return; diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp index b5f9c83db50..5a740805560 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -51,9 +51,8 @@ bool RequiredSourceColumnsMatcher::needChildVisit(const ASTPtr & node, const AST if (const auto * f = node->as()) { - /// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition /// "lambda" visit children itself. - if (f->name == "indexHint" || f->name == "lambda") + if (f->name == "lambda") return false; } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 0bd2389e941..a936ead568d 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -281,11 +281,11 @@ static const std::map inverse_relations = { bool isLogicalOperator(const String & func_name) { - return (func_name == "and" || func_name == "or" || func_name == "not" || func_name == "indexHint"); + return (func_name == "and" || func_name == "or" || func_name == "not"); } /// The node can be one of: -/// - Logical operator (AND, OR, NOT and indexHint() - logical NOOP) +/// - Logical operator (AND, OR, NOT) /// - An "atom" (relational operator, constant, expression) /// - A logical constant expression /// - Any other function @@ -302,8 +302,7 @@ ASTPtr cloneASTWithInversionPushDown(const ASTPtr node, const bool need_inversio const auto result_node = makeASTFunction(func->name); - /// indexHint() is a special case - logical NOOP function - if (result_node->name != "indexHint" && need_inversion) + if (need_inversion) { result_node->name = (result_node->name == "and") ? "or" : "and"; } @@ -887,9 +886,6 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out) { /// Functions AND, OR, NOT. - /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses - * (or, the same thing - calling the function `and` from one argument). - */ const ASTs & args = func->arguments->children; if (func->name == "not") @@ -901,7 +897,7 @@ bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNE } else { - if (func->name == "and" || func->name == "indexHint") + if (func->name == "and") out.function = RPNElement::FUNCTION_AND; else if (func->name == "or") out.function = RPNElement::FUNCTION_OR; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp index a379463210b..946b4d80c99 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -363,7 +363,7 @@ bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const func->name = "__bitSwapLastTwo"; } - else if (func->name == "and" || func->name == "indexHint") + else if (func->name == "and") { auto last_arg = args.back(); args.pop_back(); @@ -419,7 +419,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi const ASTs & args = func->arguments->children; - if (func->name == "and" || func->name == "indexHint") + if (func->name == "and") return checkASTUseless(args[0], atomic) && checkASTUseless(args[1], atomic); else if (func->name == "or") return checkASTUseless(args[0], atomic) || checkASTUseless(args[1], atomic); diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 311a09f3461..e18c9197217 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -332,10 +332,6 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const if ("globalIn" == function_ptr->name || "globalNotIn" == function_ptr->name) return true; - - /// indexHint is a special function that it does not make sense to transfer to PREWHERE - if ("indexHint" == function_ptr->name) - return true; } else if (auto opt_name = IdentifierSemantic::getColumnName(ptr)) { diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.h b/dbms/src/Storages/MergeTree/RPNBuilder.h index 234f7b06ffe..2e457147cf4 100644 --- a/dbms/src/Storages/MergeTree/RPNBuilder.h +++ b/dbms/src/Storages/MergeTree/RPNBuilder.h @@ -91,9 +91,6 @@ private: bool operatorFromAST(const ASTFunction * func, RPNElement & out) { /// Functions AND, OR, NOT. - /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses - * (or, the same thing - calling the function `and` from one argument). - */ const ASTs & args = typeid_cast(*func->arguments).children; if (func->name == "not") @@ -105,7 +102,7 @@ private: } else { - if (func->name == "and" || func->name == "indexHint") + if (func->name == "and") out.function = RPNElement::FUNCTION_AND; else if (func->name == "or") out.function = RPNElement::FUNCTION_OR; diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index b3066f88685..e851cf804d8 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -734,109 +734,6 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) └───────────────────────────────────────────────────────┘ ``` -## indexHint {#indexhint} - -The function is intended for debugging and introspection purposes. The function ignores it's argument and always returns 1. Arguments are not even evaluated. - -But for the purpose of index analysis, the argument of this function is analyzed as if it was present directly without being wrapped inside `indexHint` function. This allows to select data in index ranges by the corresponding condition but without further filtering by this condition. The index in ClickHouse is sparse and using `indexHint` will yield more data than specifying the same condition directly. - -**Syntax** - -```sql -SELECT * FROM table WHERE indexHint() -``` - -**Returned value** - -1. Type: [Uint8](https://clickhouse.yandex/docs/en/data_types/int_uint/#diapazony-uint). - -**Example** - -Here is the example of test data from the table [ontime](../../getting_started/example_datasets/ontime.md). - -Input table: - -```sql -SELECT count() FROM ontime -``` - -```text -┌─count()─┐ -│ 4276457 │ -└─────────┘ -``` - -The table has indexes on the fields `(FlightDate, (Year, FlightDate))`. - -Create a query, where the index is not used. - -Query: - -```sql -SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k -``` - -ClickHouse processed the entire table (`Processed 4.28 million rows`). - -Result: - -```text -┌──────────k─┬─count()─┐ -│ 2017-01-01 │ 13970 │ -│ 2017-01-02 │ 15882 │ -........................ -│ 2017-09-28 │ 16411 │ -│ 2017-09-29 │ 16384 │ -│ 2017-09-30 │ 12520 │ -└────────────┴─────────┘ -``` - -To apply the index, select a specific date. - -Query: - -```sql -SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k -``` - -By using the index, ClickHouse processed a significantly smaller number of rows (`Processed 32.74 thousand rows`). - -Result: - -```text -┌──────────k─┬─count()─┐ -│ 2017-09-15 │ 16428 │ -└────────────┴─────────┘ -``` - -Now wrap the expression `k = '2017-09-15'` into `indexHint` function. - -Query: - -```sql -SELECT - FlightDate AS k, - count() -FROM ontime -WHERE indexHint(k = '2017-09-15') -GROUP BY k -ORDER BY k ASC -``` - -ClickHouse used the index in the same way as the previous time (`Processed 32.74 thousand rows`). -The expression `k = '2017-09-15'` was not used when generating the result. -In examle the `indexHint` function allows to see adjacent dates. - -Result: - -```text -┌──────────k─┬─count()─┐ -│ 2017-09-14 │ 7071 │ -│ 2017-09-15 │ 16428 │ -│ 2017-09-16 │ 1077 │ -│ 2017-09-30 │ 8167 │ -└────────────┴─────────┘ -``` ## replicate {#other_functions-replicate} @@ -1005,7 +902,7 @@ joinGet(join_storage_table_name, `value_column`, join_keys) Returns list of values corresponded to list of keys. -If certain doesn't exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. +If certain doesn't exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. More info about `join_use_nulls` in [Join operation](../../operations/table_engines/join.md). diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index a988c14b56d..e85eaac6f99 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -685,111 +685,6 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) └───────────────────────────────────────────────────────┘ ``` -## indexHint {#indexhint} - -Возвращает все данные из диапазона, в который попадают данные, соответствующие указанному выражению. -Переданное выражение не будет вычислено. Выбор диапазона производится по индексу. -Индекс в ClickHouse разреженный, при чтении диапазона в ответ попадают «лишние» соседние данные. - -**Синтаксис** - -```sql -SELECT * FROM table WHERE indexHint() -``` - -**Возвращаемое значение** - -Возвращает диапазон индекса, в котором выполняется заданное условие. - -Тип: [Uint8](https://clickhouse.yandex/docs/ru/data_types/int_uint/#diapazony-uint). - -**Пример** - -Рассмотрим пример с использованием тестовых данных таблицы [ontime](../../getting_started/example_datasets/ontime.md). - -Исходная таблица: - -```sql -SELECT count() FROM ontime -``` - -```text -┌─count()─┐ -│ 4276457 │ -└─────────┘ -``` - -В таблице есть индексы по полям `(FlightDate, (Year, FlightDate))`. - -Выполним выборку по дате, где индекс не используется. - -Запрос: - -```sql -SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k -``` - -ClickHouse обработал всю таблицу (`Processed 4.28 million rows`). - -Результат: - -```text -┌──────────k─┬─count()─┐ -│ 2017-01-01 │ 13970 │ -│ 2017-01-02 │ 15882 │ -........................ -│ 2017-09-28 │ 16411 │ -│ 2017-09-29 │ 16384 │ -│ 2017-09-30 │ 12520 │ -└────────────┴─────────┘ -``` - -Для подключения индекса выбираем конкретную дату. - -Запрос: - -```sql -SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k -``` - -При использовании индекса ClickHouse обработал значительно меньшее количество строк (`Processed 32.74 thousand rows`). - -Результат: - -```text -┌──────────k─┬─count()─┐ -│ 2017-09-15 │ 16428 │ -└────────────┴─────────┘ -``` - -Передадим в функцию `indexHint` выражение `k = '2017-09-15'`. - -Запрос: - -```sql -SELECT - FlightDate AS k, - count() -FROM ontime -WHERE indexHint(k = '2017-09-15') -GROUP BY k -ORDER BY k ASC -``` - -ClickHouse применил индекс по аналогии с примером выше (`Processed 32.74 thousand rows`). -Выражение `k = '2017-09-15'` не используется при формировании результата. -Функция `indexHint` позволяет увидеть соседние данные. - -Результат: - -```text -┌──────────k─┬─count()─┐ -│ 2017-09-14 │ 7071 │ -│ 2017-09-15 │ 16428 │ -│ 2017-09-16 │ 1077 │ -│ 2017-09-30 │ 8167 │ -└────────────┴─────────┘ -``` ## replicate {#other_functions-replicate} diff --git a/docs/zh/query_language/functions/other_functions.md b/docs/zh/query_language/functions/other_functions.md index a93079f4af3..613a13e48be 100644 --- a/docs/zh/query_language/functions/other_functions.md +++ b/docs/zh/query_language/functions/other_functions.md @@ -503,102 +503,6 @@ SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) 1 rows in set. Elapsed: 0.002 sec. ``` -## indexHint - -输出符合索引选择范围内的所有数据,同时不实用参数中的表达式进行过滤。 - -传递给函数的表达式参数将不会被计算,但ClickHouse使用参数中的表达式进行索引过滤。 - -**返回值** - -- 1。 - -**示例** - -这是一个包含[ontime](../../getting_started/example_datasets/ontime.md)测试数据集的测试表。 - -``` -SELECT count() FROM ontime - -┌─count()─┐ -│ 4276457 │ -└─────────┘ -``` - -该表使用`(FlightDate, (Year, FlightDate))`作为索引。 - -对该表进行如下的查询: - -``` -:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k - -SELECT - FlightDate AS k, - count() -FROM ontime -GROUP BY k -ORDER BY k ASC - -┌──────────k─┬─count()─┐ -│ 2017-01-01 │ 13970 │ -│ 2017-01-02 │ 15882 │ -........................ -│ 2017-09-28 │ 16411 │ -│ 2017-09-29 │ 16384 │ -│ 2017-09-30 │ 12520 │ -└────────────┴─────────┘ - -273 rows in set. Elapsed: 0.072 sec. Processed 4.28 million rows, 8.55 MB (59.00 million rows/s., 118.01 MB/s.) -``` - -在这个查询中,由于没有使用索引,所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询: - -``` -:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k - -SELECT - FlightDate AS k, - count() -FROM ontime -WHERE k = '2017-09-15' -GROUP BY k -ORDER BY k ASC - -┌──────────k─┬─count()─┐ -│ 2017-09-15 │ 16428 │ -└────────────┴─────────┘ - -1 rows in set. Elapsed: 0.014 sec. Processed 32.74 thousand rows, 65.49 KB (2.31 million rows/s., 4.63 MB/s.) -``` - -在最后一行的显示中,通过索引ClickHouse处理的行数明显减少(`Processed 32.74 thousand rows`)。 - -现在将表达式`k = '2017-09-15'`传递给`indexHint`函数: - -``` -:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k - -SELECT - FlightDate AS k, - count() -FROM ontime -WHERE indexHint(k = '2017-09-15') -GROUP BY k -ORDER BY k ASC - -┌──────────k─┬─count()─┐ -│ 2017-09-14 │ 7071 │ -│ 2017-09-15 │ 16428 │ -│ 2017-09-16 │ 1077 │ -│ 2017-09-30 │ 8167 │ -└────────────┴─────────┘ - -4 rows in set. Elapsed: 0.004 sec. Processed 32.74 thousand rows, 65.49 KB (8.97 million rows/s., 17.94 MB/s.) -``` - -对于这个请求,根据ClickHouse显示ClickHouse与上一次相同的方式应用了索引(`Processed 32.74 thousand rows`)。但是,最终返回的结果集中并没有根据`k = '2017-09-15'`表达式进行过滤结果。 - -由于ClickHouse中使用稀疏索引,因此在读取范围时(本示例中为相邻日期),“额外”的数据将包含在索引结果中。使用`indexHint`函数可以查看到它们。 ## replicate From 1991464cb6d4e5bfde773dcee8dc88bf23fa6330 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Mar 2020 22:56:58 +0300 Subject: [PATCH 192/215] Remove `findClusterIndex`, `findClusterValue` functions (#9543) * Remove findCluster* functions * Fixed test * Fixed test --- dbms/src/Functions/FunctionsFindCluster.cpp | 14 - dbms/src/Functions/FunctionsFindCluster.h | 302 ------------------ dbms/src/Functions/registerFunctions.cpp | 2 - .../0_stateless/00809_add_days_segfault.sql | 2 +- 4 files changed, 1 insertion(+), 319 deletions(-) delete mode 100644 dbms/src/Functions/FunctionsFindCluster.cpp delete mode 100644 dbms/src/Functions/FunctionsFindCluster.h diff --git a/dbms/src/Functions/FunctionsFindCluster.cpp b/dbms/src/Functions/FunctionsFindCluster.cpp deleted file mode 100644 index 4f7caf8d536..00000000000 --- a/dbms/src/Functions/FunctionsFindCluster.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include - - -namespace DB -{ - -void registerFunctionsFindCluster(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerFunction(); -} - -} diff --git a/dbms/src/Functions/FunctionsFindCluster.h b/dbms/src/Functions/FunctionsFindCluster.h deleted file mode 100644 index 26eb6564020..00000000000 --- a/dbms/src/Functions/FunctionsFindCluster.h +++ /dev/null @@ -1,302 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_COLUMN; -} - -enum ClusterOperation -{ - FindClusterIndex = 0, - FindCentroidValue = 1 -}; - -/// The centroid values are converted to Float64 for easier coding of -/// distance calculations. -/// -/// We assume to have 10th to 100th centroids, usually of type Float64, as a typical use case. -/// While it is possible to sort centroids and use a modification of a binary search to find the -/// nearest centroid, we think for arrays of 10th to 100th this might be an overkill. -/// -/// Also, even though centroids of other types are feasible, this first implementation -/// lacks support of them for simplicity. Date, DateTime and Strings (eg. with the -/// Levenshtein distance) could be theoretically supported, as well as custom distance -/// functions (eg. Hamming distance) using Clickhouse lambdas. - -// Centroids array has the same size as number of clusters. -inline size_t find_centroid(Float64 x, std::vector & centroids) -{ - // Centroids array has to have at least one element, and if it has only one element, - // it is also the result of this Function. - Float64 distance = std::abs(centroids[0] - x); - size_t index = 0; - - // Check if we have more clusters and if we have, whether some is closer to src[i] - for (size_t j = 1; j < centroids.size(); ++j) - { - Float64 next_distance = std::abs(centroids[j] - x); - - if (next_distance < distance) - { - distance = next_distance; - index = j; - } - } - - // Index of the closest cluster, or 0 in case of just one cluster - return index; -} - -/** findClusterIndex(x, centroids_array) - find index of element in centroids_array with the value nearest to x - * findClusterValue(x, centroids_array) - find value of element in centroids_array with the value nearest to x - * - * Types: - * findClusterIndex(T, Array(T)) -> UInt64 - * findClusterValue(T, Array(T)) -> T - * - * T can be any numeric type. - * centroids_array must be constant - */ -class FunctionFindClusterIndex : public IFunction -{ -public: - static constexpr auto name = "findClusterIndex"; - static FunctionPtr create(const Context &) - { - return std::make_shared(); - } - - String getName() const override - { - return FunctionFindClusterIndex::name; - } - - bool isVariadic() const override - { - return true; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto args_size = arguments.size(); - if (args_size != 2) - throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " + toString(args_size) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - - const auto type_x = arguments[0]; - - if (!isNativeNumber(type_x)) - throw Exception{"Unsupported type " + type_x->getName() + " of first argument of function " + getName() + " must be a numeric type", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - const DataTypeArray * type_arr_from = checkAndGetDataType(arguments[1].get()); - - if (!type_arr_from) - throw Exception{"Second argument of function " + getName() + " must be literal array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - return std::make_shared(); - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const auto in_untyped = block.getByPosition(arguments[0]).column.get(); - const auto centroids_array_untyped = block.getByPosition(arguments[1]).column.get(); - auto column_result = block.getByPosition(result).type->createColumn(); - auto out_untyped = column_result.get(); - - if (!isColumnConst(*centroids_array_untyped)) - throw Exception{"Second argument of function " + getName() + " must be literal array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - executeImplTyped(in_untyped, out_untyped, centroids_array_untyped); - - block.getByPosition(result).column = std::move(column_result); - } - -protected: - virtual ClusterOperation getOperation() - { - return ClusterOperation::FindClusterIndex; - } - - virtual void executeImplTyped(const IColumn* in_untyped, IColumn* out_untyped, const IColumn* centroids_array_untyped) - { - if (!executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped)) - { - throw Exception{"Function " + getName() + " expects both x and centroids_array of a numeric type." - " Passed arguments are " + in_untyped->getName() + " and " + centroids_array_untyped->getName(), ErrorCodes::ILLEGAL_COLUMN}; - - } - } - - // Match the type of the centrods array and convert them to Float64, because we - // don't want to have problems calculating negative distances of UInts - template - bool fillCentroids(const IColumn * centroids_array_untyped, std::vector & centroids) - { - const ColumnConst * const_centroids_array = checkAndGetColumnConst>(centroids_array_untyped); - - if (!const_centroids_array) - return false; - - Array array = const_centroids_array->getValue(); - if (array.empty()) - throw Exception{"Centroids array must be not empty", ErrorCodes::ILLEGAL_COLUMN}; - - for (size_t k = 0; k < array.size(); ++k) - { - const Field & tmp_field = array[k]; - NearestFieldType value; - if (!tmp_field.tryGet(value)) - return false; - - centroids.push_back(Float64(value)); - } - return true; - } - - template - bool executeOperation(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * centroids_array_untyped) - { - // Match the type of the output - auto out = typeid_cast *>(out_untyped); - - if (!out) - return false; - - PaddedPODArray & dst = out->getData(); - - // try to match the type of the input column - if (!executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped) - && !executeOperationTyped(in_untyped, dst, centroids_array_untyped)) - { - return false; - } - - return true; - } - - template - bool executeOperationTyped(const IColumn * in_untyped, PaddedPODArray & dst, const IColumn * centroids_array_untyped) - { - const auto maybe_const = in_untyped->convertToFullColumnIfConst(); - in_untyped = maybe_const.get(); - - const auto in_vector = checkAndGetColumn>(in_untyped); - if (in_vector) - { - const PaddedPODArray & src = in_vector->getData(); - - std::vector centroids; - if (!fillCentroids(centroids_array_untyped, centroids)) - return false; - - for (size_t i = 0; i < src.size(); ++i) - { - size_t index = find_centroid(Float64(src[i]), centroids); - if (getOperation() == ClusterOperation::FindClusterIndex) - // Note that array indexes start with 1 in Clickhouse - dst.push_back(UInt64(index + 1)); - else if (getOperation() == ClusterOperation::FindCentroidValue) - dst.push_back(centroids[index]); - else - throw Exception{"Unexpected error in findCluster* function", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - } - - return true; - } - return false; - } - -}; - -class FunctionFindClusterValue : public FunctionFindClusterIndex -{ -public: - static constexpr auto name = "findClusterValue"; - static FunctionPtr create(const Context &) - { - return std::make_shared(); - } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - FunctionFindClusterIndex::getReturnTypeImpl(arguments); - const DataTypeArray * type_arr_from = checkAndGetDataType(arguments[1].get()); - return type_arr_from->getNestedType(); - } - - String getName() const override - { - return FunctionFindClusterValue::name; - } - -protected: - ClusterOperation getOperation() override - { - return ClusterOperation::FindCentroidValue; - } - - void executeImplTyped(const IColumn* in_untyped, IColumn* out_untyped, const IColumn* centroids_array_untyped) override - { - if (!executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped) - && !executeOperation(in_untyped, out_untyped, centroids_array_untyped)) - { - throw Exception{"Function " + getName() + " expects both x and centroids_array of a numeric type." - "Passed arguments are " + in_untyped->getName() + " and " + centroids_array_untyped->getName(), ErrorCodes::ILLEGAL_COLUMN}; - } - } -}; - -} diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 652e9a8b8af..233018c7f16 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -35,7 +35,6 @@ void registerFunctionsMath(FunctionFactory &); void registerFunctionsGeo(FunctionFactory &); void registerFunctionsIntrospection(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); -void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); void registerFunctionsConsistentHashing(FunctionFactory & factory); @@ -74,7 +73,6 @@ void registerFunctions() registerFunctionsMath(factory); registerFunctionsGeo(factory); registerFunctionsNull(factory); - registerFunctionsFindCluster(factory); registerFunctionsJSON(factory); registerFunctionsIntrospection(factory); registerFunctionsConsistentHashing(factory); diff --git a/dbms/tests/queries/0_stateless/00809_add_days_segfault.sql b/dbms/tests/queries/0_stateless/00809_add_days_segfault.sql index b087f7bbde5..3d2e11ece77 100644 --- a/dbms/tests/queries/0_stateless/00809_add_days_segfault.sql +++ b/dbms/tests/queries/0_stateless/00809_add_days_segfault.sql @@ -8,5 +8,5 @@ SET send_logs_level = 'none'; SELECT ignore(addDays((CAST((96.338) AS DateTime)), -3)); SELECT ignore(subtractDays((CAST((-5263074.47) AS DateTime)), -737895)); -SELECT quantileDeterministic([], findClusterIndex(( SELECT subtractDays((CAST((566450.398706) AS DateTime)), 54) ) )), '\0', []; -- { serverError 42 } +SELECT quantileDeterministic([], identity(( SELECT subtractDays((CAST((566450.398706) AS DateTime)), 54) ) )), '\0', []; -- { serverError 43 } SELECT sequenceCount((CAST((( SELECT NULL ) AS rg, ( SELECT ( SELECT [], ' Date: Sat, 7 Mar 2020 23:35:55 +0300 Subject: [PATCH 193/215] Support for NULL as random seed; reordered parameters for convenience --- dbms/src/Storages/StorageGenerateRandom.cpp | 26 +++++++++---------- dbms/src/Storages/StorageGenerateRandom.h | 3 ++- .../TableFunctionGenerateRandom.cpp | 16 ++++++------ docs/en/operations/table_engines/generate.md | 4 +-- .../table_functions/generate.md | 6 ++--- 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index 6f98dc1746c..e2e04b9a638 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -352,10 +352,10 @@ private: StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const ColumnsDescription & columns_, - UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_) + UInt64 max_array_length_, UInt64 max_string_length_, std::optional random_seed_) : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) { - random_seed = random_seed_ ? random_seed_ : randomSeed(); + random_seed = random_seed_ ? *random_seed_ : randomSeed(); setColumns(columns_); } @@ -367,25 +367,25 @@ void registerStorageGenerateRandom(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() > 3) - throw Exception("Storage GenerateRandom requires at most three arguments: "\ - "max_array_length, max_string_length, random_seed.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception("Storage GenerateRandom requires at most three arguments: " + "random_seed, max_string_length, max_array_length.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - UInt64 max_array_length_ = 10; - UInt64 max_string_length_ = 10; - UInt64 random_seed_ = 0; // zero for random + std::optional random_seed; + UInt64 max_string_length = 10; + UInt64 max_array_length = 10; - /// Parsing second argument if present if (engine_args.size() >= 1) - max_array_length_ = engine_args[0]->as().value.safeGet(); + random_seed = engine_args[2]->as().value.safeGet(); if (engine_args.size() >= 2) - max_string_length_ = engine_args[1]->as().value.safeGet(); + max_string_length = engine_args[0]->as().value.safeGet(); if (engine_args.size() == 3) - random_seed_ = engine_args[2]->as().value.safeGet(); + max_array_length = engine_args[1]->as().value.safeGet(); - return StorageGenerateRandom::create(args.table_id, args.columns, max_array_length_, max_string_length_, random_seed_); + + return StorageGenerateRandom::create(args.table_id, args.columns, max_array_length, max_string_length, random_seed); }); } diff --git a/dbms/src/Storages/StorageGenerateRandom.h b/dbms/src/Storages/StorageGenerateRandom.h index 7622099dcbb..f39ca1f18c1 100644 --- a/dbms/src/Storages/StorageGenerateRandom.h +++ b/dbms/src/Storages/StorageGenerateRandom.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -29,7 +30,7 @@ private: protected: StorageGenerateRandom(const StorageID & table_id_, const ColumnsDescription & columns_, - UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed); + UInt64 max_array_length, UInt64 max_string_length, std::optional random_seed); }; } diff --git a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp index 327e941508a..df615729cb8 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -36,30 +36,30 @@ StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function, if (args.size() < 1) throw Exception("Table function '" + getName() + "' requires at least one argument: " - " structure(, max_array_length, max_string_length, random_seed).", + " structure, [random_seed, max_string_length, max_array_length].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (args.size() > 4) throw Exception("Table function '" + getName() + "' requires at most four arguments: " - " structure, max_array_length, max_string_length, random_seed.", + " structure, [random_seed, max_string_length, max_array_length].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// Parsing first argument as table structure and creating a sample block std::string structure = args[0]->as().value.safeGet(); - UInt64 max_array_length = 10; UInt64 max_string_length = 10; - UInt64 random_seed = 0; // zero for random + UInt64 max_array_length = 10; + std::optional random_seed = 0; // zero for random - /// Parsing second argument if present if (args.size() >= 2) - max_array_length = args[1]->as().value.safeGet(); + random_seed = args[3]->as().value.safeGet(); if (args.size() >= 3) - max_string_length = args[2]->as().value.safeGet(); + max_string_length = args[1]->as().value.safeGet(); if (args.size() == 4) - random_seed = args[3]->as().value.safeGet(); + max_array_length = args[2]->as().value.safeGet(); + ColumnsDescription columns = parseColumnsListFromString(structure, context); diff --git a/docs/en/operations/table_engines/generate.md b/docs/en/operations/table_engines/generate.md index bdf52f84ac1..fd98b3c9d18 100644 --- a/docs/en/operations/table_engines/generate.md +++ b/docs/en/operations/table_engines/generate.md @@ -10,7 +10,7 @@ Usage examples: ## Usage in ClickHouse Server ```sql -ENGINE = GenerateRandom(max_array_length, max_string_length, random_seed) +ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) ``` The `max_array_length` and `max_string_length` parameters specify maximum length of all @@ -25,7 +25,7 @@ It supports all [DataTypes](../../data_types/index.md) that can be stored in a t **1.** Set up the `generate_engine_table` table: ```sql -CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE=GenerateRandom(3, 5, 1) +CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) ``` **2.** Query the data: diff --git a/docs/en/query_language/table_functions/generate.md b/docs/en/query_language/table_functions/generate.md index 2f43bf453db..cca435557ac 100644 --- a/docs/en/query_language/table_functions/generate.md +++ b/docs/en/query_language/table_functions/generate.md @@ -5,7 +5,7 @@ Allows to populate test tables with data. Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`. ```sql -generateRandom('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length'[, 'max_string_length'[, 'random_seed']]]); +generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]); ``` **Parameters** @@ -15,7 +15,7 @@ generateRandom('name TypeName[, name TypeName]...', 'limit'[, 'max_array_length' - `limit` — Number of rows to generate. - `max_array_length` — Maximum array length for all generated arrays. Defaults to `10`. - `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. -- `random_seed` — Specify random seed manually to produce stable results. Defaults to `0` — seed is randomly generated. +- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. **Returned Value** @@ -25,7 +25,7 @@ A table object with requested schema. ```sql -SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 3, 2, 10, 1); +SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 1, 10, 2); ``` ```text ┌─a────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐ From 609aef269d9d18a6150c0dc1c8f27c3f03dd6fa8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Mar 2020 23:54:27 +0300 Subject: [PATCH 194/215] Support for NULL as random seed; reordered parameters for convenience --- dbms/src/Storages/StorageGenerateRandom.cpp | 6 +++--- dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index e2e04b9a638..9efc832bf81 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -376,13 +376,13 @@ void registerStorageGenerateRandom(StorageFactory & factory) UInt64 max_array_length = 10; if (engine_args.size() >= 1) - random_seed = engine_args[2]->as().value.safeGet(); + random_seed = engine_args[0]->as().value.safeGet(); if (engine_args.size() >= 2) - max_string_length = engine_args[0]->as().value.safeGet(); + max_string_length = engine_args[1]->as().value.safeGet(); if (engine_args.size() == 3) - max_array_length = engine_args[1]->as().value.safeGet(); + max_array_length = engine_args[2]->as().value.safeGet(); return StorageGenerateRandom::create(args.table_id, args.columns, max_array_length, max_string_length, random_seed); diff --git a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp index df615729cb8..6431ca3bd54 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -52,13 +52,13 @@ StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function, std::optional random_seed = 0; // zero for random if (args.size() >= 2) - random_seed = args[3]->as().value.safeGet(); + random_seed = args[1]->as().value.safeGet(); if (args.size() >= 3) - max_string_length = args[1]->as().value.safeGet(); + max_string_length = args[2]->as().value.safeGet(); if (args.size() == 4) - max_array_length = args[2]->as().value.safeGet(); + max_array_length = args[3]->as().value.safeGet(); ColumnsDescription columns = parseColumnsListFromString(structure, context); From 757c37c03ee7cdd2cacf133f0475fca19330bb1f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 00:09:45 +0300 Subject: [PATCH 195/215] Support for NULL as random seed; reordered parameters for convenience --- dbms/src/Storages/StorageGenerateRandom.cpp | 10 +++++++--- .../TableFunctions/TableFunctionGenerateRandom.cpp | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index 9efc832bf81..7584b417a93 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -376,13 +376,17 @@ void registerStorageGenerateRandom(StorageFactory & factory) UInt64 max_array_length = 10; if (engine_args.size() >= 1) - random_seed = engine_args[0]->as().value.safeGet(); + { + const Field & value = engine_args[0]->as().value; + if (!value.isNull()) + random_seed = value.safeGet(); + } if (engine_args.size() >= 2) - max_string_length = engine_args[1]->as().value.safeGet(); + max_string_length = engine_args[1]->as().value.safeGet(); if (engine_args.size() == 3) - max_array_length = engine_args[2]->as().value.safeGet(); + max_array_length = engine_args[2]->as().value.safeGet(); return StorageGenerateRandom::create(args.table_id, args.columns, max_array_length, max_string_length, random_seed); diff --git a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp index 6431ca3bd54..16c957aecbe 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -45,20 +45,24 @@ StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// Parsing first argument as table structure and creating a sample block - std::string structure = args[0]->as().value.safeGet(); + std::string structure = args[0]->as().value.safeGet(); UInt64 max_string_length = 10; UInt64 max_array_length = 10; std::optional random_seed = 0; // zero for random if (args.size() >= 2) - random_seed = args[1]->as().value.safeGet(); + { + const Field & value = args[1]->as().value; + if (!value.isNull()) + random_seed = value.safeGet(); + } if (args.size() >= 3) - max_string_length = args[2]->as().value.safeGet(); + max_string_length = args[2]->as().value.safeGet(); if (args.size() == 4) - max_array_length = args[3]->as().value.safeGet(); + max_array_length = args[3]->as().value.safeGet(); ColumnsDescription columns = parseColumnsListFromString(structure, context); From 4a5d61f43f099e55fdd83d81f08cc63fdfc62b77 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 01:07:09 +0300 Subject: [PATCH 196/215] Fixed errors --- dbms/src/Storages/StorageGenerateRandom.cpp | 10 +++++----- .../src/TableFunctions/TableFunctionGenerateRandom.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index 7584b417a93..ee0a0b23e62 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -41,7 +41,7 @@ namespace ErrorCodes namespace { -void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64_fast & rng) +void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64 & rng) { char * __restrict end = data + size; while (data < end) @@ -55,7 +55,7 @@ void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64_fast & ColumnPtr fillColumnWithRandomData( - const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg64_fast & rng, const Context & context) + const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg64 & rng, const Context & context) { TypeIndex idx = type->getTypeId(); @@ -343,7 +343,7 @@ private: UInt64 max_string_length; Block block_header; - pcg64_fast rng; + pcg64 rng; const Context & context; }; @@ -355,7 +355,7 @@ StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const UInt64 max_array_length_, UInt64 max_string_length_, std::optional random_seed_) : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) { - random_seed = random_seed_ ? *random_seed_ : randomSeed(); + random_seed = random_seed_ ? sipHash64(*random_seed_) : randomSeed(); setColumns(columns_); } @@ -416,7 +416,7 @@ Pipes StorageGenerateRandom::read( } /// Will create more seed values for each source from initial seed. - pcg64_fast generate(random_seed); + pcg64 generate(random_seed); for (UInt64 i = 0; i < num_streams; ++i) pipes.emplace_back(std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header, context)); diff --git a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp index 16c957aecbe..7b5177b0090 100644 --- a/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/dbms/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -49,7 +49,7 @@ StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function, UInt64 max_string_length = 10; UInt64 max_array_length = 10; - std::optional random_seed = 0; // zero for random + std::optional random_seed; if (args.size() >= 2) { From 82eb71fd899218f0c9e799f0aba355396ba03ca3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 01:13:01 +0300 Subject: [PATCH 197/215] Added missing code --- dbms/src/Storages/StorageGenerateRandom.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dbms/src/Storages/StorageGenerateRandom.cpp b/dbms/src/Storages/StorageGenerateRandom.cpp index ee0a0b23e62..12c5d1ac09f 100644 --- a/dbms/src/Storages/StorageGenerateRandom.cpp +++ b/dbms/src/Storages/StorageGenerateRandom.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -295,6 +297,14 @@ ColumnPtr fillColumnWithRandomData( fillBufferWithRandomData(reinterpret_cast(column_concrete.getData().data()), limit * sizeof(Decimal128), rng); return column; } + case TypeIndex::FixedString: + { + size_t n = typeid_cast(*type).getN(); + auto column = ColumnFixedString::create(n); + column->getChars().resize(limit * n); + fillBufferWithRandomData(reinterpret_cast(column->getChars().data()), limit * n, rng); + return column; + } case TypeIndex::DateTime64: { auto column = type->createColumn(); From ec893a999866743913fd54344aa7641294c4cef3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 01:13:21 +0300 Subject: [PATCH 198/215] Adjusted parameters in test --- .../01087_table_function_generate.sql | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.sql b/dbms/tests/queries/0_stateless/01087_table_function_generate.sql index 6891dd94520..0e856b79972 100644 --- a/dbms/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.sql @@ -10,7 +10,7 @@ SELECT ui32, i32, ui16, i16, ui8, i8 -FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) +FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -19,7 +19,7 @@ FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)') LIMIT 1; SELECT i -FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) +FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -28,7 +28,7 @@ FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))') LIMIT 1; SELECT i -FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) +FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -37,7 +37,7 @@ FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))') LIMIT 1; SELECT i -FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) +FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -46,7 +46,7 @@ FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')') LIMIT 1; SELECT d, dt, dtm -FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) +FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -55,7 +55,7 @@ FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 , LIMIT 1; SELECT dt64, dts64, dtms64 -FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) +FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -64,7 +64,7 @@ FROM generateRandom('f32 Float32, f64 Float64') LIMIT 1; SELECT f32, f64 -FROM generateRandom('f32 Float32, f64 Float64', 10, 10, 1) +FROM generateRandom('f32 Float32, f64 Float64', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -73,7 +73,7 @@ FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)') LIMIT 1; SELECT d32, d64, d128 -FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) +FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -82,7 +82,7 @@ FROM generateRandom('i Tuple(Int32, Int64)') LIMIT 1; SELECT i -FROM generateRandom('i Tuple(Int32, Int64)', 10, 10, 1) +FROM generateRandom('i Tuple(Int32, Int64)', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -91,7 +91,7 @@ FROM generateRandom('i Array(Int8)') LIMIT 1; SELECT i -FROM generateRandom('i Array(Int8)', 10, 10, 1) +FROM generateRandom('i Array(Int8)', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -100,7 +100,7 @@ FROM generateRandom('i Array(Nullable(Int32))') LIMIT 1; SELECT i -FROM generateRandom('i Array(Nullable(Int32))', 10, 10, 1) +FROM generateRandom('i Array(Nullable(Int32))', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -109,7 +109,7 @@ FROM generateRandom('i Tuple(Int32, Array(Int64))') LIMIT 1; SELECT i -FROM generateRandom('i Tuple(Int32, Array(Int64))', 10, 10, 1) +FROM generateRandom('i Tuple(Int32, Array(Int64))', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -118,7 +118,7 @@ FROM generateRandom('i Nullable(String)', 1) LIMIT 1; SELECT i -FROM generateRandom('i Nullable(String)', 10, 10, 1) +FROM generateRandom('i Nullable(String)', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -127,7 +127,7 @@ FROM generateRandom('i Array(String)') LIMIT 1; SELECT i -FROM generateRandom('i Array(String)', 10, 10, 1) +FROM generateRandom('i Array(String)', 1, 10, 10) LIMIT 10; SELECT '-'; @@ -137,7 +137,7 @@ FROM generateRandom('i UUID') LIMIT 1; SELECT i -FROM generateRandom('i UUID', 10, 10, 1) +FROM generateRandom('i UUID', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -146,7 +146,7 @@ FROM generateRandom('i Array(Nullable(UUID))') LIMIT 1; SELECT i -FROM generateRandom('i Array(Nullable(UUID))', 10, 10, 1) +FROM generateRandom('i Array(Nullable(UUID))', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -155,7 +155,7 @@ FROM generateRandom('i FixedString(4)') LIMIT 1; SELECT i -FROM generateRandom('i FixedString(4)', 10, 10, 1) +FROM generateRandom('i FixedString(4)', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT @@ -164,12 +164,12 @@ FROM generateRandom('i String') LIMIT 1; SELECT i -FROM generateRandom('i String', 10, 10, 1) +FROM generateRandom('i String', 1, 10, 10) LIMIT 10; SELECT '-'; DROP TABLE IF EXISTS test_table; CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=Memory; -INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 2, 10, 1) +INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 1, 10, 2) LIMIT 10; SELECT * FROM test_table; @@ -180,7 +180,7 @@ SELECT '-'; DROP TABLE IF EXISTS test_table_2; CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)) ENGINE=Memory; -INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)', 3, 5, 10) +INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime, DateTime64, UUID), h FixedString(2)', 10, 5, 3) LIMIT 10; SELECT * FROM test_table_2; From bfe44d171ed4d23eaf7f277bde7577b3884b2033 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 01:17:45 +0300 Subject: [PATCH 199/215] Updated test --- .../01087_table_function_generate.reference | 364 +++++++++--------- .../01087_table_function_generate.sql | 4 +- 2 files changed, 184 insertions(+), 184 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference index 08081a34bc9..291e5b1689a 100644 --- a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference @@ -1,238 +1,238 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -5443401583997919274 956654340036924402 2956613447 2041372187 46025 26509 247 -34 -14051730854243326159 340055300607421421 579798001 915264595 58925 22498 36 -57 -12126660396637528292 -9182366379883086416 535113873 -1583603936 45790 6066 230 91 -5198178071978083704 -3549936112074464250 3354362520 -1732019372 41330 -27737 13 -47 -9045663333607591872 -5069075924065328373 741246230 -1830932765 29642 -11720 41 7 -18192666371709191624 -5005976579831091773 671021725 1851158245 38613 -27838 57 3 -4333039311970693040 -7294587049092886539 2106347821 2101852759 24058 9107 85 94 -1398111012802844853 1131449717368086026 1687614855 -1193084417 9803 -18141 198 115 -15838944643191192696 6226099517671026657 1300309956 468322781 17216 -2375 184 -102 -15170414162889419078 3337938833953948518 3603117877 -1297530274 25534 8264 36 16 +2804162938822577320 -2776833771540858 3467776823 1163715250 23903 13655 137 -41 +7885388429666205427 -1363628932535403038 484159052 -308788249 56810 -22227 51 -41 +4357435422797280898 1355609803008819271 4126129912 -852056475 64304 -11401 139 86 +5935810273536892891 -804738887697332962 3109335413 -80126721 258 12889 18 88 +368066018677693974 -4927165984347126295 1015254922 2026080544 44305 21973 16 0 +8124171311239967992 -1179703908046100129 1720727300 -138469036 61343 10573 252 -32 +15657812979985370729 -5733276247123822513 3254757884 -500590428 45913 19153 105 -102 +18371568619324220532 -6793779541583578394 1686821450 -455892108 49050 -28603 248 80 +821735343441964030 3148260644406230976 256251035 -885069056 58858 -29361 58 61 +9558594037060121162 -2907172753635797124 4276198376 1947296644 26801 -13531 204 -66 - Enum8(\'hello\' = 1, \'world\' = 5) +hello world -world +hello world hello hello world -world -world hello -world +hello +hello - Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5))) -['world','world','hello','hello','world','world','world'] +['hello','hello','world','hello',NULL,'world','world','hello','hello','hello'] +['world','hello'] +['hello','hello','world','hello','world',NULL,NULL,NULL] +['world','world','hello','world','world','world',NULL,'hello','world','world'] +[] +['hello','hello','world','hello','hello','world','hello','world'] +['world',NULL,'world','world','hello','hello','world'] ['world'] -['world','hello','world'] -[] -[] -['world','hello','hello','hello','world'] -['hello'] -['world','hello','hello','world','hello'] -['hello','world','hello','hello','world','world'] -['world','hello','world','hello','hello','world','world'] +['world','world'] +[NULL,'hello','hello','world','world','world','hello','world','world','world'] - Nullable(Enum16(\'o\' = -200, \'h\' = 1, \'w\' = 5)) -w -h -h o -w +h +h +h +h w o -w h +\N o - Date DateTime DateTime(\'Europe/Moscow\') -2031-03-05 2034-09-09 02:49:47 2061-06-26 03:46:01 -1972-10-06 1999-01-02 11:09:55 2064-03-18 05:47:09 -2004-01-16 2055-12-02 15:29:20 2090-08-18 23:04:46 -2061-07-14 2051-03-20 20:58:44 1973-04-20 21:20:34 -2063-04-13 2048-01-31 01:02:11 2051-02-07 03:11:54 -2106-02-07 2028-08-29 13:37:25 2054-10-20 03:48:21 -2026-11-24 2036-08-09 02:59:19 2065-10-12 06:39:38 -2106-02-07 2068-04-17 13:07:59 2101-04-03 08:48:59 -1997-11-15 1984-11-03 12:39:41 1998-04-01 17:38:08 -2008-09-11 2064-12-25 16:23:42 2031-10-18 03:20:14 +2106-02-07 2050-12-17 02:46:35 2096-02-16 22:18:22 +2106-02-07 2013-10-17 23:35:26 1976-01-24 12:52:48 +2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50 +1997-04-11 1972-09-18 23:44:08 2040-07-10 14:46:42 +2103-11-03 2044-11-23 20:57:12 1970-10-09 02:30:14 +2066-11-19 2029-12-10 03:13:55 2106-01-30 21:52:44 +2064-08-14 2016-07-14 11:33:45 2096-12-12 00:40:50 +2046-09-13 2085-07-10 18:51:14 2096-01-15 16:31:33 +2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39 +2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22 - DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') -1988-05-16 19:00:01.447 2064-03-18 05:47:09.972361 2104-06-20 09:26:44.845879 -2076-04-17 18:22:00.873 1973-04-20 21:20:34.769886 2052-08-01 07:14:05.921510 -1991-04-07 13:55:25.230 2054-10-20 03:48:21.341514 2013-02-07 18:37:45.437737 -2023-06-24 16:54:15.821 2101-04-03 08:48:59.544378 2039-07-05 08:51:02.770005 -2084-03-05 21:04:37.956 2031-10-18 03:20:14.437888 2076-03-16 14:08:20.993528 -1999-01-02 11:09:55.187 2054-01-01 16:49:22.580109 1997-01-09 20:11:35.889758 -2051-03-20 20:58:44.360 1975-02-11 06:38:15.042546 2015-10-21 23:47:13.191963 -2028-08-29 13:37:25.531 1975-02-14 07:25:38.319928 2103-09-16 20:57:23.033927 -2068-04-17 13:07:59.759 2024-03-06 21:42:43.711891 2045-04-22 19:38:11.140126 -2064-12-25 16:23:42.781 2025-08-18 15:44:56.149625 2093-09-26 16:30:56.744858 +1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215 +1978-08-25 17:07:25.427 2034-05-02 20:49:42.148578 2015-08-26 15:26:31.783160 +2037-04-04 10:50:56.898 2055-05-28 11:12:48.819271 2068-12-26 09:58:49.635722 +2041-09-02 07:07:24.891 2051-08-01 14:15:40.218654 2081-10-19 15:55:40.057084 +1976-07-15 23:59:41.974 2075-01-29 20:34:10.425321 1996-12-31 10:51:28.562331 +1974-11-03 08:09:51.992 2010-04-19 04:09:03.451487 1994-05-15 15:42:53.162162 +2061-10-11 20:14:02.729 1981-07-22 10:13:45.729103 2084-05-27 08:59:37.746021 +1989-12-13 02:01:16.532 1992-10-05 07:07:57.973222 2037-10-24 18:53:50.985504 +1992-12-28 12:26:04.030 1971-07-29 09:20:38.230976 1980-03-26 18:49:55.428516 +2051-12-11 10:09:13.162 1982-01-12 03:25:45.754492 2010-05-17 11:01:28.452864 - Float32 Float64 -2.3424705e38 5.304765772621186e307 -4.5936326e37 1.3693852957827914e308 -4.2396088e37 1.1817811347484115e308 -2.6575997e38 5.065787759860024e307 -5.8727575e37 8.815282962741328e307 -5.3163816e37 1.7729324649694315e308 -1.6688205e38 4.2226828718895e307 -1.3370661e38 1.3625030842560206e307 -1.0302116e38 1.5435548915708008e308 -2.8546838e38 1.4784044970034722e308 +-1.3551149e32 1.2262973812461839e235 +1.6263936e-15 -1.4319274895836525e122 +-8.991488e-33 -3.587091060722666e303 +4.6137895e27 9.96990958623199e-254 +9.749564e-13 -3.014080971435583e-286 +0.01518069 nan +-2.5833165e-24 -2.6774132404843463e217 +240769800000 4.559039863342969e-218 +2.0838264e-33 -6.156499824044965e254 +7.317837e-36 -1.6511853645079817e-21 - Decimal32(4) Decimal64(8) Decimal64(8) --133835.3849 87676267830.44260947 10041303591043480341650.6377217747572943 -57979.8001 -68015271123.73929132 -11658496611537681782723.8256877955807880 -53511.3873 -78637963449.98695195 16686303649199763212696.4854950355256776 --94060.4776 90273888640.14252543 7993046724924589483272.0796323974797493 -74124.6230 20114310313.64207198 -4810540869033768101015.4448286464595642 -67102.1725 -60472921957.85611731 1764715777766465744700.9237855716355053 -210634.7821 -20967919098.37725326 -16938476260073815366594.8118263905360890 -168761.4855 -74544559691.08355371 -9350794626143586522954.2962771754340925 -130030.9956 -54650148153.48939189 -13456138041801265081736.4812607484010998 --69184.9419 38286965773.25360062 11485126437992390872631.7990315807376230 +-18731.5032 81241713112.39967992 -10576027963457111164764.0798899532879521 +65289.5061 -27889310937.24180887 5807515838469365530027.7612329616030438 +-197586.1517 -751754543.85331084 3835903211857734974086.0358362773591932 +183596.0063 8217353434.41964030 13633006218585943284268.9826084812209912 +73041.2674 -88881500366.49430454 -148702703925022894263.3187064158377476 +101454.4494 -27768337.71540858 -634829280961262229789.4961995996929358 +-174012.0101 -13636289325.35403038 -3611949395160064991369.2765012316944096 +138203.8526 13556098030.08819271 134470734594381953531.9736002591779584 +15395.1766 -8047388876.97332962 16804394201271843589306.4234533639925009 +8569.7048 -49271659843.47126295 -14851374957489266092927.8687987539036841 - Tuple(Int32, Int64) -(-1338353849,5443401583997919274) -(579798001,-4395013219466225457) -(535113873,-6320083677072023324) -(-940604776,5198178071978083704) -(741246230,9045663333607591872) -(671021725,-254077702000359992) -(2106347821,4333039311970693040) -(1687614855,1398111012802844853) -(1300309956,-2607799430518358920) -(-691849419,-3276329910820132538) +(-187315032,8124171311239967992) +(652895061,-2788931093724180887) +(-1975861517,-75175454385331084) +(1835960063,821735343441964030) +(730412674,-8888150036649430454) +(1014544494,-2776833771540858) +(-1740120101,-1363628932535403038) +(1382038526,1355609803008819271) +(153951766,-804738887697332962) +(85697048,-4927165984347126295) - Array(Int8) -[27,83,32,84,-29,-27,87] -[-1] -[-35,94,-55] +[-122,110,114,1,124,34,-10,-1,-30,61] +[-56,-18] +[-75,106,19,-19,71,-64,96,-6] +[-18,23,-48,18,30,-123,-37,-51,62,-2] [] -[] -[45,-34,114,-54,-43] -[-6] -[75,64,-66,-115,-30] -[-78,-89,56,66,-109,35] -[-71,72,-9,36,-26,13,41] +[-44,-12,-23,5,-102,121,-92,48] +[-97,-69,95,-87,-59,-60,-117] +[-39] +[-96,-17] +[79,-8,-1,34,-95,79,111,-80,-26,58] - Array(Nullable(Int32)) -[2041372187,915264595,-1583603936,-1732019372,-1830932765,1851158245,2101852759] -[-1193084417] -[468322781,-1297530274,-1407994935] +[24276614,-646532,-288866846,-317494603,-94322617,NULL,-841251554,-187367874,2040137193,-1147195228] +[-993679009,-274671221] +[587200591,-1334882399,2107128550,-1581800064,976027584,733011552,-1898440836,-676878904] +[-827190473,NULL,-168837384,-1185631883,1015254922,1720727300,-1040209412,1686821450,256251035,-18768920] [] -[] -[-1321933267,-488197410,104178034,-1735625782,-1618897195] -[-1272422918] -[-153016757,891437888,1950049214,6580109,-1644079134] -[790042546,161321895,1074319928,161583938,515711891,1709750563] -[-149817671,1755521096,815845879,-51580892,1361921510,-1688868851,-1185529559] +[1163715250,-308788249,-852056475,NULL,2026080544,-138469036,-500590428,-455892108] +[-885069056,1947296644,-571843233,16972592,-274748143,-1080380583,1756489194] +[2121012739] +[NULL,NULL] +[NULL,-1874507055,-886731441,821482880,311112585,-127271920,873843770,NULL,1482086359,1352327168] - Tuple(Int32, Array(Int64)) -(-1338353849,[5443401583997919274,-4395013219466225457,-6320083677072023324,5198178071978083704,9045663333607591872,-254077702000359992,4333039311970693040]) -(579798001,[1398111012802844853,-2607799430518358920,-3276329910820132538,956654340036924402,340055300607421421]) -(535113873,[]) -(-940604776,[-9182366379883086416,-3549936112074464250,-5069075924065328373,-5005976579831091773]) -(741246230,[-7294587049092886539]) -(671021725,[1131449717368086026,6226099517671026657,3337938833953948518,-104956130729581604,515805789944032293]) -(2106347821,[2731028582309582302,-8197314279937271385,7439592879615992239,-8726726222408049230,-4046170041070917399,-8162695179087422573,7147712321550951494,-2473105312361834401,2871941532606538254]) -(1687614855,[7045950974355040215,8128475529675984757,3862453874713979777,8584893221699499395,-4344095019439049735,7221768832555831190,5712009283210486481,8657278465574644253,-4620821897447975309]) -(1300309956,[-3580736586972265629]) -(-691849419,[7980379733974797651,-548434416689229144]) +(-187315032,[-1179703908046100129,-5733276247123822513,-6793779541583578394,3148260644406230976,-2907172753635797124,2079447297870140215,-5092250158453768456,7390467479849635722]) +(652895061,[7244842965196057084,-80611897324989285,-1326235429680389454,-344141642787805595,-594719979102566112,-1958041690570123100,8363575405000452864]) +(-1975861517,[72896731291475295]) +(1835960063,[-4640199267198194415,9109680350160872938]) +(730412674,[3628163485716526423,-8050946496653339179,3528242107232128335,-546628733788015735,-4538379399781299142,5808200961534384087,808655365001887293,-8844185874661444452,643019962728680518,8250477176286620863]) +(1014544494,[-287233184700985380,1749427670542803376,-6435907283441313909,-398230413002921126,7257457516659393153,1409595563647451721,-946856126400551895,-8238390188888204749,8805947767553724527]) +(-1740120101,[4566168555161068712,2303771336793744574,-2858308093688847501,-674013896865039545,597627259745635607,1493493065813843889]) +(1382038526,[8163231169061670909]) +(153951766,[-8934224507121259258,-5682982952279561296,-7665466011309768105,1158449958889177529,-5943902981558064139]) +(85697048,[2092132020040612180,-7829994932198211729,5992776369613298329,-2557419325779681965,-2080080576758631926,1226592045800496749,1791516122484114661,-6512855691472390097,764717459130233392,4903403636828862838]) - Nullable(String) -;\\Sm\'sH -T -@^1 +fSRH40d6sX - -7-f)$ -9 -)&}y3 -w5>+\'@ -+g+N^g$ +=@ep]Vw~ +b\' +6xGwg|(&Q +^ipx|,=a5N +(U]p\'l` +U6 +\'%Y~t9 +RL,{Xs\\tw - Array(String) -['Y9n(%ub','\\\'f%7','','X0O@','D','4^,~q','a(gmt6#{X'] -['@#q4?Q%\'.'] -['h','#B','{'] +['6xGwg|(&Q','^ipx|,=a5N','(U]p\'l`','U6','\'%Y~t9','RL,{Xs\\tw','`xbguF','?/;UTko','k3Igp@',''] +['TlL','UeS}D'] +['0Z3|h','-]','&b!M-e;7','Dj7peUH{T','=D[','_@}a ','_}!','O,9V'] +['r;5qbK&t+','4?a]25n#','_K','4S9,;m','RM nh|E7*','-L[3','','Fm?\'','/D$','.7^Jp5sba$'] [] -[] -['~vYP/4f9.',':,7u.0',';e.<','^O,i','3'] -['!y1/Z\'5D'] -['&- KDN%>[','>-xM./ B','?+//','M,.71QR#_','~N'] -['z9P/%m','7q\'!k','Q%] #.*3','U:&XeP{*',',','s.3'] -['+k.=%','8\'nb=P','-uY ,h8(w','=\'W$','','m<+%l','<~+@ Vw'] +['CB','TaI&szh','Hnc?lApSP','2O"ms26O>','bX?}ix [','UlI+1','U','NQTpY#'] +['8+>','# ;M<:{M','tb:^UG','?AaqQ?$Ee\'','PbE.6x]^'] - UUID -4b8ad8e6-77fe-a02a-c301-c6c33e91d8cf -a84a8d61-8560-a0e4-4823-a36a537a8578 -7d88a6e0-3d74-cfc0-fc79-55ad1aebc5c8 -3c220c4b-fbd0-efb0-1367-168f02acd8b5 -dbcf3c3c-127c-bc78-d288-234e0a942946 -0d46b80d-bebc-93f2-04b8-1e7e84b1abed -8091ae6d-8194-3db0-cebc-17ea18786406 -b9a7064d-de99-e30b-ba87-32d2cd3e2dc3 -9ac46bba-2ba7-4ff5-0fb3-b785f4f0de0a -56678c86-2703-2fe1-2e52-bdaf2fce8366 +26ea6355-f4d5-cca8-6d6e-86ff8a3abef3 +3c78b86e-2b89-3682-5260-3bfe9847dfdb +051ba218-092d-1e16-70be-dac38ce0b4f8 +d94bb9f2-5787-4e69-fef4-ec529188a474 +0b6763f0-ed3d-ebfe-84a6-f2a21322b24a +fff6227c-0172-6e86-ed13-6ab5eec83de2 +12d017ee-fa60-c047-f4d4-fe3ecddb851e +bb9f30a4-799a-05e9-efa0-d98bc4c5a95f +b06f4fa1-22ff-f84f-a1b7-a5807d983ae6 +2bb0de60-3a2c-ffc0-d7a7-a5c88ed8177c - Array(Nullable(UUID)) -['4b8ad8e6-77fe-a02a-c301-c6c33e91d8cf','a84a8d61-8560-a0e4-4823-a36a537a8578','7d88a6e0-3d74-cfc0-fc79-55ad1aebc5c8','3c220c4b-fbd0-efb0-1367-168f02acd8b5','dbcf3c3c-127c-bc78-d288-234e0a942946','0d46b80d-bebc-93f2-04b8-1e7e84b1abed','8091ae6d-8194-3db0-cebc-17ea18786406'] -['b9a7064d-de99-e30b-ba87-32d2cd3e2dc3'] -['9ac46bba-2ba7-4ff5-0fb3-b785f4f0de0a','56678c86-2703-2fe1-2e52-bdaf2fce8366','fe8b1ef4-86dd-23dc-0728-82a212b42c25'] +['fff6227c-0172-6e86-ed13-6ab5eec83de2','12d017ee-fa60-c047-f4d4-fe3ecddb851e','bb9f30a4-799a-05e9-efa0-d98bc4c5a95f','b06f4fa1-22ff-f84f-a1b7-a5807d983ae6','2bb0de60-3a2c-ffc0-d7a7-a5c88ed8177c','1cdbae4c-ceb2-1337-b954-b175f5efbef8','66903704-3c83-8f8a-648a-da4ac1ffa9fc','fee19be8-0f46-149b-ed98-43e7455ce2b2','fb395cff-cd36-a665-f7bf-215478c38920','e4d3a374-e229-98a4-7411-6384cb3eeb00'] +[NULL,'7e6c1603-68b1-e5ea-3259-d377a92d3557'] +['90454ad1-294d-55d5-30f6-d580cb258d4f','f869fc10-128b-3389-c104-6ffb3415cc3a','509ae000-5856-d7d7-0b38-ebc520a6be3d','854323ce-3546-f49c-08ec-773fcded8c46','727f9547-6808-2cbf-fc03-8af0ba318fdc','18473732-8ef1-61b0-a6af-106cb321978b','fa79338c-9e34-2b5a-64b7-ab28e4f8c281',NULL] +['8dab5bc5-a641-5a33-7a35-02d6b3af106f','3f5e4a1b-baef-24a8-1ff8-a3d32d5164be','d8553fee-c90d-6373-f6a5-6bee8faebb47','084b32d3-8d3a-8d17-14b9-f407ce57dfb1','71499f7f-6351-4ffd-8403-42257572ad06','b121fd1d-58c6-abb0-959e-cb46ae0d0257','1013a419-91a7-c1b9-ad83-03bafcd8abf5','1d08befc-d5bd-b954-9356-451821377f6f','532a9e5b-6927-da99-dc82-38ac492edd53','e32211bc-761c-b20a-1105-baf786421e6d'] [] -[] -['25e69006-a800-55de-8e3d-4a17f81a19a7','673ebe4e-af09-61af-86e4-70bca5481db2','c7d91dc9-0123-e8e9-8eb8-47fb80b35b93','6331c67c-7aba-2446-ddad-c3d24c8a985f','27db2f9b-92b4-220e-61c8-3f14833fe7d7'] -['70ce256d-7fca-cf75-359a-3155bba86b81'] -['7723aae6-820e-b583-c3b6-ac9887cab3f9','6438e07d-9a7a-4f96-4f45-222df77ea2d1','7824d4e5-6e37-2a1d-bfdf-8af768445673','ce4eab0b-b346-0363-6ec0-0116104b4d53','f86391cd-c2a3-d2a8-ea85-27ed3508504c'] -['b87d39f3-e56c-7128-7a66-4e516e2ce1c0','af25bac3-d662-673a-3516-022e687643ed','657c9997-3b29-f51c-7193-6a3b9a0c18eb','19bb38b5-6f97-a81c-2d56-57f189119a1a','0c7e416d-c669-dc04-1130-ff950fbbf44b','9705bc44-8d08-c734-6f47-8edcc1608a81'] -['7c20103f-659a-f845-399c-abdc8dc88ba0','bf479e85-1a0f-66c3-66fa-f6029e4ee2a8','f14af1a0-823c-b414-eb8e-e6b05b019868','ce353e45-2a9e-492c-1c54-d50459160ecf','1e232279-77ad-db7e-82f6-b4b3e30cdc2e','991111af-30a3-1ff7-e15a-023dfa0a8a6e','a749ef39-dc02-d05c-e8b2-129a7cccfd24'] +['18dcbe6b-ea0d-04e5-a59d-b0415468382f','0a9cd27a-38ff-3230-440c-63a0db8af976','dff6ee08-7533-ac1f-3f4b-a6450dffffb3','20fc45e0-8b0d-3652-1ef9-6a69cfab4ad8','09f997b7-ff78-73cf-d2e1-014279fc877b','3e96646a-f80c-c4b5-b6ea-ac9de22a1c1d','b9804d77-da87-b21e-33e8-a148791c24c3','4763e2bd-c2ec-9d95-8e8d-78d0fdcd6100'] +['150955fb-d228-6700-f1ee-9c8579cfdf10',NULL,NULL,'706b1793-0b9b-b007-e892-8bb3f13ed5aa','d48ec551-5c15-5f80-09cf-5f3a09b4232e','11f16f65-b07b-8ca6-9037-c400f4cee06f','fd5cb198-44c1-631c-545a-b493d3456ca4'] +['8978d7e4-5b0b-ce2d-c714-66d3728f8226'] +['716309e3-bc54-8fae-d159-077e7f58328a','80413bd9-30b3-e071-8079-0f198e1952bf'] +['a55486af-06c3-4d55-fb6e-54559812b2c0','9f93002e-efb6-c6fd-3928-154e7bbe640b','1f197019-cde5-8fe3-16c0-45d857e32f5d','f2532c84-e28e-d4b8-89a5-335907448f4e','09c22b07-180f-4acb-d898-8d6c84667cf3','d11f8791-0962-9a60-d28c-2f3763793435','9aa152ff-1f38-4172-909e-a75c2f6cd244','7a5e4b17-0178-0a6d-7955-f6e5488d4da2','2bf6aea1-9cb5-1726-8dac-c7144c65e4a9','bfddcefd-68fa-85d8-a550-87605cd540cc'] - FixedString(4) -G -- -5S - T -W^ --r -K -@ -8B -#H +A8CCD5F4 +5563EA26 +F3BE3A8A +FF866E6D +8236892B +6EB8783C +DBDF4798 +FE3B6052 +161E2D09 +18A21B05 - String -;\\Sm\'sH -T -@^1 +fSRH40d6sX - -7-f)$ -9 -)&}y3 -w5>+\'@ -+g+N^g$ +=@ep]Vw~ +b\' +6xGwg|(&Q +^ipx|,=a5N +(U]p\'l` +U6 +\'%Y~t9 +RL,{Xs\\tw - -[27] -119308.4417 ('1998-04-01 17:38:08.539','4b8ad8e6-77fe-a02a-c301-c6c33e91d8cf') -[83] 46832.2781 ('1970-03-18 06:48:29.214','a84a8d61-8560-a0e4-4823-a36a537a8578') -[32] -129753.0274 ('1995-01-14 03:15:46.162','7d88a6e0-3d74-cfc0-fc79-55ad1aebc5c8') -[] -140799.4935 ('2004-01-17 09:12:08.895','3c220c4b-fbd0-efb0-1367-168f02acd8b5') -[] -132193.3267 ('1986-05-06 01:18:11.938','dbcf3c3c-127c-bc78-d288-234e0a942946') -[84] -48819.7410 ('2101-05-10 09:27:05.563','0d46b80d-bebc-93f2-04b8-1e7e84b1abed') -[-29] 10417.8034 ('1995-11-08 18:51:19.096','8091ae6d-8194-3db0-cebc-17ea18786406') -[-27] -173562.5782 ('2013-02-27 03:31:50.404','b9a7064d-de99-e30b-ba87-32d2cd3e2dc3') -[] -161889.7195 ('2068-07-13 23:42:17.445','9ac46bba-2ba7-4ff5-0fb3-b785f4f0de0a') -[87] -127242.2918 ('2033-08-04 15:06:45.865','56678c86-2703-2fe1-2e52-bdaf2fce8366') +[-122] -9432.2617 ('2001-08-23 08:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4') +[110] 31562.7502 ('2045-02-27 11:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f') +[114] -84125.1554 ('2023-06-06 06:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') +[1] -18736.7874 ('1977-03-10 04:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') +[] 204013.7193 ('2026-05-05 05:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') +[124] -114719.5228 ('2010-11-11 22:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') +[34,-10] -99367.9009 ('2031-05-08 10:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') +[] -27467.1221 ('2021-03-08 03:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') +[-1] 58720.0591 ('1976-06-07 23:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') +[-30,61] -133488.2399 ('2048-05-14 09:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') - -[] 3608695403 ZL 109414.2847 h 2.2986075276244747e306 ('1985-05-10','2009-10-28 20:06:11','1993-01-03 17:51:52.981','b13ff007-c245-d737-85b2-1fa003e57127') . -[85] 4204173796 ], -199466.5471 h 1.1231803213254798e308 ('2075-04-03','1983-02-12 23:57:05','2060-06-06 20:15:08.751','a2f2cbf4-b11b-6976-7b91-14b6964acbe2') * -[-94,100] 32713522 8D$ 102255.5602 h 1.738807291208415e308 ('2029-07-12','2056-08-07 23:18:32','2081-01-25 13:13:30.589','445a77b5-0a27-3485-8dd8-c7cc35d2692f') -[] 4117557956 0b>+ 65942.4942 w 5.949505844751135e307 ('2048-03-05','2074-01-22 02:32:44','2073-12-04 05:05:06.955','c12095e6-b82c-d81c-4629-acd80e02b080')  -[] 1511604199 Il= -96352.6064 o 1.6472659147355216e308 ('2024-06-01','2024-12-26 00:54:40','2038-04-14 05:21:44.387','ebbbe70a-0321-ff18-89de-2bc9a9e4c454') Q -[-18] 2278197196 ~ 193977.7666 o 1.213689191969361e308 ('2060-10-04','1992-10-24 16:31:53','1983-06-10 08:51:48.294','805b0a62-9ada-a47e-2d5e-63cb5923549c') \t -[] 3761265784 N"(6 -59230.0369 o 1.2102282609858645e308 ('2106-02-07','2060-07-09 20:14:59','2007-03-17 04:51:09.288','429df3a4-ff18-28d5-9ad8-dcdd78e8b1ae') Y| -[] 66606254 6x&+ 130635.2269 o 1.1958868988757417e308 ('2088-10-07','2070-03-01 21:30:45','1978-05-22 14:28:52.523','d63c5cbb-9418-ce59-000c-056f88157bfa') у -[-27,-12] 4089193163 )+.8 -111081.7896 o 1.464035857434812e308 ('2106-02-07','2007-04-27 23:04:36','1987-07-21 04:32:01.821','2bc4860a-7214-300a-851e-b61011c346ef') # -[14,-43] 3638976325 #" 116961.4294 o 9.260305126207595e307 ('2042-06-11','2087-12-28 00:21:16','2071-04-01 21:44:13.058','c00d218a-913f-b657-1ff9-99927741f7ab') Fx +[] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1984-12-06','2028-08-17 06:05:01','2036-04-02 23:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d') F743 +[55] 3047524030 li&lF 93462.3661 h 2.8979254388809897e54 ('1976-01-10','1987-07-14 00:25:51','2021-11-19 04:44:08.986','486e5b26-5fe8-fe3e-12ef-09aee40643e0') 9E75 +[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2106-02-07','2074-08-10 06:25:12','1976-12-04 18:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf') AD43 +[100,-42] 3999367674 -112975.9852 h 2.658098863752086e-160 ('2081-05-13','2071-08-07 13:34:33','1980-11-11 12:00:44.669','9754e8ac-5145-befb-63d9-a12dd1cf1f3a') DF63 +[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('2106-02-07','2090-07-31 16:45:26','2076-07-10 09:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304') EB1D +[48,-120] 3848918261 1 Date: Sun, 8 Mar 2020 01:59:56 +0300 Subject: [PATCH 200/215] Update JEMalloc just in case --- contrib/jemalloc | 2 +- .../internal/jemalloc_internal_defs.h | 6 +++++ .../jemalloc/internal/jemalloc_preamble.h | 22 +++++++++++++++++++ .../jemalloc/jemalloc_macros.h | 18 ++++++++++----- .../internal/jemalloc_internal_defs.h | 6 +++++ .../jemalloc/internal/jemalloc_preamble.h | 22 +++++++++++++++++++ .../jemalloc/jemalloc_macros.h | 17 +++++++++----- 7 files changed, 81 insertions(+), 12 deletions(-) diff --git a/contrib/jemalloc b/contrib/jemalloc index cd2931ad9bb..ea6b3e973b4 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff +Subproject commit ea6b3e973b477b8061e0076bb257dbd7f3faa756 diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h index 9c46a3a9320..5e598348e72 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h @@ -379,4 +379,10 @@ */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h index c150785fb4a..d79551e1f25 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h @@ -191,4 +191,26 @@ static const bool have_background_thread = #endif ; +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 +#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 + +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + #endif /* JEMALLOC_PREAMBLE_H */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h index 79b13337fbb..34235894285 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h @@ -4,13 +4,13 @@ #include #include -#define JEMALLOC_VERSION "5.1.0-97-gcd2931ad9bbd78208565716ab102e86d858c2fff" +#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" #define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 1 -#define JEMALLOC_VERSION_BUGFIX 0 -#define JEMALLOC_VERSION_NREV 97 -#define JEMALLOC_VERSION_GID "cd2931ad9bbd78208565716ab102e86d858c2fff" -#define JEMALLOC_VERSION_GID_IDENT cd2931ad9bbd78208565716ab102e86d858c2fff +#define JEMALLOC_VERSION_MINOR 2 +#define JEMALLOC_VERSION_BUGFIX 1 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" +#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 #define MALLOCX_LG_ALIGN(la) ((int)(la)) #if LG_SIZEOF_PTR == 2 @@ -69,6 +69,7 @@ # define JEMALLOC_EXPORT __declspec(dllimport) # endif # endif +# define JEMALLOC_FORMAT_ARG(i) # define JEMALLOC_FORMAT_PRINTF(s, i) # define JEMALLOC_NOINLINE __declspec(noinline) # ifdef __cplusplus @@ -96,6 +97,11 @@ # ifndef JEMALLOC_EXPORT # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) # endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +# else +# define JEMALLOC_FORMAT_ARG(i) +# endif # ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF # define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) # elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h index 43936e8eba0..7c21fa79397 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h @@ -370,4 +370,10 @@ */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h index c150785fb4a..d79551e1f25 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h @@ -191,4 +191,26 @@ static const bool have_background_thread = #endif ; +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 +#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 + +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + #endif /* JEMALLOC_PREAMBLE_H */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h index 7432f1cda53..34235894285 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h @@ -4,12 +4,13 @@ #include #include -#define JEMALLOC_VERSION "5.1.0-56-g41b7372eadee941b9164751b8d4963f915d3ceae" +#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" #define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 1 -#define JEMALLOC_VERSION_BUGFIX 0 -#define JEMALLOC_VERSION_NREV 56 -#define JEMALLOC_VERSION_GID "41b7372eadee941b9164751b8d4963f915d3ceae" +#define JEMALLOC_VERSION_MINOR 2 +#define JEMALLOC_VERSION_BUGFIX 1 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" +#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 #define MALLOCX_LG_ALIGN(la) ((int)(la)) #if LG_SIZEOF_PTR == 2 @@ -68,6 +69,7 @@ # define JEMALLOC_EXPORT __declspec(dllimport) # endif # endif +# define JEMALLOC_FORMAT_ARG(i) # define JEMALLOC_FORMAT_PRINTF(s, i) # define JEMALLOC_NOINLINE __declspec(noinline) # ifdef __cplusplus @@ -95,6 +97,11 @@ # ifndef JEMALLOC_EXPORT # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) # endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +# else +# define JEMALLOC_FORMAT_ARG(i) +# endif # ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF # define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) # elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) From 20eeb4651ec8ab5837d662785b01e8ca6794dd1f Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Sat, 7 Mar 2020 19:35:27 -0400 Subject: [PATCH 201/215] Update adopters.md Fixed https://lifestreet.com/ URL --- docs/en/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index d1c7a35cead..8d103cb89ea 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -36,7 +36,7 @@ | [Integros](https://integros.com) | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | | [Kodiak Data](https://www.kodiakdata.com/) | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | | [Kontur](https://kontur.ru) | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| [LifeStreet](https://cloudflare.com) | Ad network | Main product | 60 servers in 3 replicas | 2-2.5 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| [LifeStreet](https://lifestreet.com/) | Ad network | Main product | 60 servers in 3 replicas | 2-2.5 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | [Mail.ru Cloud Solutions](https://mcs.mail.ru/) | Cloud services | Main product | — | — | [Running ClickHouse Instance, in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | | [MessageBird](https://www.messagebird.com) | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | | [MGID](https://www.mgid.com/) | Ad network | Web-analytics | — | — | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | From 6f8a8bb21e492ad3ff0d1dd119074435d087abc2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 04:24:13 +0300 Subject: [PATCH 202/215] Updated test --- .../01087_storage_generate.reference | 200 +++++++++--------- .../0_stateless/01087_storage_generate.sql | 2 +- .../01087_table_function_generate.reference | 24 +-- .../01087_table_function_generate.sql | 4 +- 4 files changed, 115 insertions(+), 115 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.reference b/dbms/tests/queries/0_stateless/01087_storage_generate.reference index 33e5fdd18ee..3680d8d943d 100644 --- a/dbms/tests/queries/0_stateless/01087_storage_generate.reference +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.reference @@ -1,103 +1,103 @@ 100 - -[] -183162.1041 ('2074-01-15 19:36:42.413','0345f8ad-8936-8cc9-9ff2-394f225fc318') -[3] -39049.0845 ('2045-07-04 15:01:09.380','f79d47d8-4030-9916-54b9-495a5ccc7202') -[-95,-104] -14324.1685 ('2023-10-02 06:05:53.887','ea94157b-737b-a272-acd5-c7ab9c6f07c3') -[] -182420.0194 ('2002-01-08 01:42:50.396','ac579c0f-8523-144f-aa4c-c9587cc27144') -[] 127157.2100 ('2006-11-23 14:25:39.542','d07c5204-ef95-6804-83df-01dedaf32522') -[28] -198600.4267 ('2060-09-25 02:57:41.504','b13ff007-c245-d737-85b2-1fa003e57127') -[] -87232.0739 ('2027-05-12 20:26:59.405','a2f2cbf4-b11b-6976-7b91-14b6964acbe2') -[] -110349.8987 ('2042-11-01 10:51:30.039','445a77b5-0a27-3485-8dd8-c7cc35d2692f') -[32,73] 123253.5669 ('2016-06-21 00:23:02.917','c12095e6-b82c-d81c-4629-acd80e02b080') -[-109,85] 34320.8302 ('2080-12-31 16:49:32.509','ebbbe70a-0321-ff18-89de-2bc9a9e4c454') -[68,76] 160458.5593 ('2030-05-23 03:33:29.681','805b0a62-9ada-a47e-2d5e-63cb5923549c') -[69] -189322.2887 ('2084-06-19 03:31:58.508','429df3a4-ff18-28d5-9ad8-dcdd78e8b1ae') -[] 189158.4731 ('1985-12-21 16:36:51.092','d63c5cbb-9418-ce59-000c-056f88157bfa') -[-120] 114890.5905 ('2089-02-19 22:30:18.216','2bc4860a-7214-300a-851e-b61011c346ef') -[] 54493.1631 ('2062-08-18 04:57:01.348','c00d218a-913f-b657-1ff9-99927741f7ab') -[103] 88891.6006 ('2071-05-23 21:46:45.644','036d2746-f7aa-b5a4-b716-b8e8a5e041da') -[28] -41173.9863 ('2044-03-11 10:06:47.659','6bf54ef0-9bad-54d4-5ca3-02d79883b697') -[] -64809.0652 ('2010-11-02 23:46:46.150','ae5cafb4-fe3c-71a5-9a76-0314c44180de') -[125,74] 28139.7661 ('1989-07-19 22:10:13.477','ee9b8173-4426-8615-97eb-a904266847e5') -[-101] -29032.2527 ('2052-08-13 08:55:15.045','ab8ee358-ff53-de7e-f012-cd0eed754ff2') -[82] 170334.6151 ('2034-02-23 18:50:46.847','f1402791-1d23-e56d-25f0-5a51a3cb245a') -[27,56] 168648.5067 ('2004-05-31 19:47:19.448','5019365e-f74d-b31e-aacb-63d8189e3e3e') -[-74] 89577.4738 ('1997-01-15 09:05:57.691','d144325c-24a9-411e-cc10-78b0637f75a7') -[100] 178532.5772 ('2055-04-22 16:20:11.498','41439c27-fba1-1b66-13a7-cf79fded4d9a') -[32] 123101.7871 ('2103-02-18 15:53:42.748','25b804d3-a73e-ed14-e6e1-eafb0d9473cd') -[-115,-85] -208371.1662 ('2039-10-22 18:06:50.235','41fba85a-5080-48bb-e18d-a8af04a890aa') -[-12,101] -7791.5577 ('2073-07-27 11:35:45.239','c00be55f-59ac-762c-af0a-9e33cf30a1f4') -[-127] -18602.1918 ('2024-02-05 19:54:00.798','ea85cbed-66f2-197b-4e63-dfbdcd306cce') -[-78,125] -112158.3556 ('2016-08-12 06:46:17.173','15809e25-b003-010b-c63c-9e880568736a') -[] 151191.1081 ('1982-11-12 17:59:10.171','d6bbbe2c-fca0-53c8-22a6-de9e0715d3cc') -[46] 58689.9611 ('2085-04-13 09:13:07.230','fe4be382-eb78-4cf9-fa57-c6eccf955419') -[-4,42] -88292.1046 ('1980-03-06 08:29:12.503','2633522e-ff9c-b837-1b9b-6559875c13b0') -[-2] 64983.6649 ('2034-07-07 11:20:23.903','d19f5c4d-e444-2e5b-a55d-5280d1760b94') -[-31,-110] -25638.6649 ('2025-05-17 21:45:25.519','3654a15e-bfa3-6075-b5b8-07e25310de1f') -[25] -140469.2476 ('2083-12-13 23:55:25.450','940f7441-ae40-d810-f6c3-e2fff468050c') -[-99] 128186.7318 ('1995-09-15 04:26:33.803','4cfd264f-ff00-4190-929c-b675826607d3') -[] -52961.0340 ('2046-03-19 14:15:50.245','314de821-308b-c61c-e256-9f6afed5d4f3') -[] -155852.9334 ('2052-01-29 06:31:08.957','5be2ccd2-b5bb-921d-5b5e-4a0e22385de7') -[-74,81] 29366.0091 ('1978-03-05 19:24:49.193','ab9b6a39-89ac-9280-c76f-60d598ce65c6') -[9] 56134.8951 ('2104-04-03 10:27:33.053','339a7f6d-0e0b-e039-78c0-2d045457d821') -[-61] 68841.1188 ('2059-07-26 12:14:33.197','c817bbb3-d091-b73c-1b9a-53f8a03bffb6') -[89] 168034.0459 ('2028-05-29 08:02:02.393','bc217a73-e802-1772-80b5-d8c827295799') -[124] 11648.6762 ('2084-12-25 12:10:35.676','77390177-1dd6-a5c0-dd35-4f85e38bcb2c') -[-47,-125] -120893.6705 ('2012-10-18 22:52:57.524','472814b2-4033-c5a5-7d86-fb36079e88fb') -[35] 153250.6252 ('2006-11-06 00:05:25.456','de0d6ed9-eca6-e01e-eb1c-c46c8ad6e33e') -[-43,70] -141086.3184 ('2013-02-03 23:07:11.759','65d48b24-cdc0-f7db-cb16-d0ad03279bcc') -[120,-57] -93351.1404 ('2000-02-03 14:39:00.466','6991722b-90dc-e9dd-c5e7-f28bd1d4f0d8') -[34,43] 187780.4567 ('2014-02-10 05:22:19.250','3db77bc5-d877-b22e-6667-955bf36d2e08') -[73] -90148.5697 ('2014-10-05 18:34:31.419','5a0f919e-38c9-0a68-e805-977db04d0acb') -[] -179121.0029 ('2077-01-23 07:57:55.365','fcf79336-a6dc-44fd-8c78-7e74e07b60fa') -[-69,120] 119321.8003 ('1989-07-01 13:11:35.185','92f6a362-250c-cfcd-acd7-99399cbf88ad') -[] 208864.8324 ('1991-02-17 03:04:00.682','b0dc8e88-ea6f-c2da-c116-3e4873dc8d54') -[22,-14] -127735.4391 ('2036-08-10 08:33:03.806','5ab1ab2b-913d-ff8a-6f8f-86387e77ed5c') -[83,-70] -142476.9847 ('2074-11-22 19:27:13.085','51b9d30a-3b10-265c-4086-1ac35b634ec7') -[] -128052.2443 ('2088-01-02 10:58:36.999','745e8226-d906-7fb3-33f4-9a079037bdcd') -[12,-116] -88390.1399 ('2074-02-18 17:46:45.208','fb5f827e-1809-6cab-2855-d45df20ecd92') -[] -84110.2097 ('2039-03-24 17:08:15.660','88e18c93-6276-d176-dad1-7db72e340ca7') -[] 202866.8175 ('2104-01-25 13:42:41.758','10faa33e-d383-c6b3-399d-44c06ebb00f5') -[-21] 151775.1601 ('1995-10-20 15:44:53.296','7ccaf135-787d-2ac0-09c0-7545c798ee14') -[-19] -15498.5738 ('2097-08-02 18:34:16.406','cf97f268-02c0-24fc-bbf3-c7b272632c14') -[116] -72670.9713 ('2020-08-31 18:10:41.904','f9cdd931-e2ed-0584-d4b9-67a6df717a4c') -[] 124014.7040 ('1975-07-23 11:17:25.176','ccf33ba5-8fd8-c8b5-ccc4-a9cb892d4b55') -[-56] -204745.8115 ('2037-11-13 01:03:12.923','6dc83c7b-7782-57b4-a293-18ca8aba331d') -[] -28535.2534 ('2105-04-07 20:51:09.990','0d9f3a2f-d4f2-a330-7b6e-001ea3aacbde') -[-124,-128] -31519.7583 ('1993-02-14 23:06:10.338','a073dafb-6f1f-273e-acf9-88200f82af6d') -[46] -154950.9257 ('2032-06-04 23:16:16.051','e6aa3b80-9f53-6c10-0cc8-622622f964b4') -[] 206914.3454 ('2003-10-05 10:44:30.786','137ed3be-2d40-d1c1-7aff-b32f7e21c0da') -[-47] 91521.1349 ('2006-09-01 04:06:32.496','52e4ef43-9379-4864-8f63-8e205875a096') -[121] 161456.7813 ('2027-11-03 10:20:30.670','f1abbd17-f399-657c-1a47-1dd627578b53') -[99] -127959.4741 ('2084-08-18 06:04:41.942','2a3b92c3-75ed-bd20-5a77-b77cbe1ce479') -[-97] 82020.4570 ('2061-10-25 06:16:50.814','8625d479-6e81-318f-5077-a9deb13c50e0') -[71] -121599.1388 ('2010-04-02 11:05:18.877','0ec279cf-c9b2-dc65-40c0-2d0f390b1102') -[] 98975.6469 ('2049-03-06 08:56:25.010','845340d7-a1df-9ddf-b737-9eb90ca6344c') -[92,81] 135864.7854 ('2040-12-30 21:17:28.184','ea224755-198e-c9ae-c59b-0517a7459d7c') -[81] -154620.5037 ('1984-06-07 02:36:28.734','52d3b727-043f-1d43-6f48-51e8abdc2127') -[38] 33379.3375 ('2057-10-19 17:03:44.317','e709bfc2-0915-9e4e-4d01-c10b24795e30') -[] 7491.1071 ('1971-04-29 09:30:25.245','26bcd2ab-6d0b-fc20-27eb-084c4248af7d') -[-122] -135635.3813 ('2010-03-04 23:05:25.982','66ed96eb-fc6e-653e-0353-ac4477ea60a6') -[] -174748.4115 ('2020-10-28 07:39:33.461','e17fa9ba-2595-c0f9-2f85-d6bbdc2f6f6a') -[72,106] 25749.2190 ('2008-06-15 04:03:39.682','0e47b616-da80-091e-664d-2a35bc57a480') -[-84,97] 109277.9244 ('1998-10-27 10:40:00.442','9488bce4-46d7-8249-78aa-540b8be43937') -[-120,-107] -64113.5210 ('2091-12-03 06:46:11.903','325fcb1c-8552-b434-b349-732d62be19f1') -[] -66141.6000 ('2085-10-05 08:08:11.830','4c66022b-75b9-b0a8-3897-b9de8ea851f1') -[-34,-102] -142314.4437 ('2038-01-27 12:04:29.739','91e9eb11-5679-02ef-6ea6-2c9fdcb12ed9') -[103] 96187.7213 ('1978-10-07 13:57:43.616','7c02e8e3-9e98-5043-8029-34e32ad1af61') -[] -21344.8423 ('2085-01-13 00:10:52.538','52cb36f8-987a-f414-7e0f-93ddccc5c377') -[16] -95098.4107 ('2074-02-19 18:56:00.878','821e4b10-f70a-4bee-ef0c-ac12eab994f3') -[21,86] 27954.7748 ('2033-10-18 03:15:38.815','bfe4d932-c5ed-45c0-9f50-72a6394d49af') -[] 149788.2085 ('2073-09-10 20:42:48.693','5e7d825e-5c88-7c89-4235-0e7934739a12') -[33,116] -148302.8732 ('2044-08-10 22:05:18.943','a53d4b07-5529-7472-3cca-3770f52b3648') -[] -98384.4505 ('2070-01-28 05:17:35.804','4833b839-51a3-87b8-7709-30676f697aa4') -[] -75597.1523 ('2075-02-04 19:24:01.477','d64becff-5c08-b0a0-e7f1-b86eaf5f1913') -[] 179005.6113 ('2100-05-27 21:54:12.965','d87ce81c-c471-b6b3-93b7-05225cb577be') -[] -134366.9213 ('2054-11-16 18:19:00.801','c348fced-6700-f0f6-cda0-14aef7ea6948') -[10] 82182.0343 ('2017-03-04 09:41:21.249','e19f0022-49ab-2d41-872d-be35669a79bc') -[-28] 90333.8564 ('2032-11-19 01:23:37.107','e2586be2-e968-21d0-d1b1-b438c55a59a3') -[-73] 185647.6735 ('2001-01-23 16:20:26.442','24b04f39-f272-24ff-538d-41e636a1a37a') -[-79,7] -87628.8007 ('2005-03-25 04:17:49.969','38a10e9d-7086-f358-8e50-c72b278bec42') -[119,-55] -208591.8591 ('1976-11-14 15:17:57.569','d0935dc7-7f56-71db-67f2-1b4e52770ba9') -[-108,-124] 181408.0349 ('2056-10-27 05:07:32.393','29d655c1-c35a-1245-25e2-65b4f233cb9c') +[] -54259.6828 ('2088-03-01 16:26:24.094','d3c2a216-a98c-d56c-7bf7-62de9f264cf4') +[88] 34528.4014 ('2031-12-09 00:40:39.898','9ef777c8-de0e-d25e-e16c-5b624f88523c') +[-1] 121968.7945 ('2060-02-05 09:18:12.011','7655e515-d2ca-2f06-0950-e4f44f69aca7') +[-103,75] -135033.4349 ('2038-12-19 20:38:58.695','86b57d15-292d-2517-9acf-47cd053e7a3a') +[110] -202668.6900 ('2009-06-18 01:53:29.808','bc630f78-7d58-0c46-dd4b-27fc35625e96') +[-22,2] 168636.9728 ('2074-09-03 09:20:20.936','7624ce27-9bff-4e9d-3f18-6851a97dd0ca') +[-22,-62] -75192.4989 ('2085-10-11 21:51:12.855','a4c4d0ed-f448-244e-1723-ca1bba816f2b') +[-2,-90] 133592.5064 ('2010-10-28 21:18:04.633','8ba9103b-f90c-b49b-38c1-223ae5f42bf7') +[-94,80] 197330.6359 ('2024-03-30 22:08:45.772','83442013-3677-5097-065d-72dfbe8a3506') +[23] 167557.6237 ('2078-07-25 21:54:42.480','be14d98e-5b24-54ee-c959-d24fa9a58fdd') +[46,-10,-63] 185107.1979 ('2040-10-07 06:06:53.504','5ed1fe6a-9313-41d7-4bf9-3948e961509f') +[-107,68] -163781.3045 ('2021-12-21 19:18:58.933','7b634f19-0863-829e-484b-be288aab54a1') +[-35,-116,73] -203577.5379 ('2093-08-01 20:21:09.407','d371bad4-b098-ffdd-f84c-6a02390c2939') +[61] 152284.9386 ('2089-12-20 19:21:33.149','9e8426c1-278a-4d9c-4076-364a95b065e3') +[75] 170968.4171 ('2020-07-17 15:45:31.975','47397a81-bda7-8bd9-59f7-d60e2204fe99') +[-115,93] -173740.5652 ('2098-04-25 22:10:33.327','117e31dd-102e-ee6c-0dbd-0a4203c18ca5') +[-20,4,21] 63834.8685 ('2000-07-08 18:09:40.271','10b0fa48-55a3-755a-4a44-36315ae04c1c') +[-110,117,91] -160640.1506 ('1998-04-18 10:58:04.479','6dfa3a8e-6e65-543c-5f50-1ff45835aa5a') +[62] 63817.7977 ('2043-01-24 02:07:18.972','98b8ef31-4f65-2f8b-1ea7-b1473900099e') +[-2] -175477.0173 ('2007-01-16 07:46:14.781','ec92f616-6e1f-003a-54c6-c5f9118d2f1b') +[] 197663.3035 ('2046-06-30 17:04:56.788','fb3244a4-8af2-104f-2a6f-25a7b7b9a112') +[-24] -174299.4691 ('2058-02-23 14:50:58.839','d63ee868-fa93-bf8b-0264-8ebbceb13e3b') +[95,38] -65083.7371 ('2015-03-10 13:33:16.429','47bd199c-f99e-51ea-84e9-b65cce9d167c') +[91,110,72] 130908.9643 ('2036-03-16 15:17:53.679','0dd4ca31-1e09-d7e0-f3df-60cad3cfa805') +[] 208972.3779 ('2034-03-05 22:29:21.994','1069d77c-dfd2-912e-60b8-3c5b964f7e11') +[-32] 167938.5050 ('2093-09-10 20:39:39.050','9d1025b6-2d0c-1d84-dafd-02668eb29270') +[] 153744.6987 ('2088-10-02 11:02:11.024','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6') +[67] -74220.6650 ('2074-12-30 18:43:40.817','68096065-18c8-8aca-fd21-15330ead669d') +[6] 66759.8938 ('2091-09-01 19:07:18.219','bb14f4cc-0b54-9a8c-e835-71333b28c03b') +[-28,-82,9] 168625.3131 ('2002-03-20 21:02:30.321','405bb877-6e28-8b91-cb62-bd82a3fa797c') +[] -19760.1670 ('2044-11-08 07:52:03.325','13769348-9e58-0e75-3972-8bbadc150715') +[] 160663.7797 ('2025-04-12 13:17:53.501','e6370321-94f5-97e6-0348-a84e72ff5b42') +[-17,18] 99105.9856 ('1972-05-01 12:23:11.688','02618b9e-97cd-4698-d2e8-3f52f4c5a09a') +[86,77] -116990.3914 ('1981-12-31 05:06:54.198','3ac42bb4-8652-b1a8-10bb-98f0337261f8') +[-109,69,-63] -151527.3587 ('2001-01-17 11:19:56.504','77fe7ee2-f279-2855-bfd2-a7d7cee678cc') +[] -57762.3928 ('1978-08-16 18:47:37.660','ab9a110a-fd8d-3c4c-5a49-34c2005536ce') +[-77] 107274.6407 ('2017-01-12 12:03:02.657','c1ad4f17-cc54-45f3-9410-9c1011653f6d') +[] 107133.6410 ('2050-10-05 06:29:27.154','36e576aa-c77f-994e-1925-4a4c40da3a0f') +[] 46672.2176 ('2094-01-21 20:25:39.144','e9ba850d-604e-bc7d-417c-1078e89d4615') +[-87,-122,-65] -86258.4663 ('2081-06-17 03:37:45.498','64795221-9719-7937-b4d2-be5f30065ece') +[-53] -48672.1424 ('1992-06-27 17:27:23.602','7c67bc31-c7bb-6197-fdca-f73329b976f2') +[34] -108954.7820 ('2096-07-03 23:06:30.632','9c1b37d7-4ced-9428-a0ae-34c5436b14c4') +[] -168124.2364 ('1987-06-03 06:47:12.945','d1c39af4-f920-5095-b8e2-0f878950167b') +[] -112431.4799 ('2021-07-26 07:04:58.527','da07a72d-7e1f-8890-4c4b-326835d11b39') +[-35,-95,58] -181254.9139 ('2086-11-12 17:17:14.473','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846') +[98,119] 11468.5238 ('2092-02-25 11:07:07.695','a1fb97bf-1885-6715-c233-b88a6cd111e4') +[] 82333.8963 ('1989-11-23 01:38:57.012','a2b82b5b-8331-555c-579b-de4b0eeb7e81') +[-5,-66,69] 32055.8376 ('2040-12-17 16:49:08.704','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4') +[81,-84,-24] -210815.2512 ('2047-06-09 13:30:06.922','ac3c5b5f-f977-2830-c398-d10a6076a498') +[84,-105] -175413.7733 ('1998-11-03 04:30:21.191','c535feac-1943-c0a1-23f0-645d5406db24') +[58,31] -335.8512 ('1973-07-09 12:21:10.444','24a7dd3d-2565-1de3-05d9-e45fd8ba7729') +[-49,-47] 177399.2836 ('2049-03-15 15:33:00.190','e4432b9b-61e9-d451-dc87-ae3b9da6fd35') +[] 211525.2349 ('2106-01-11 10:44:18.918','23315435-7132-05b5-5a9b-c2c738433a87') +[45,-95,-39] -15314.9732 ('2055-10-29 13:51:12.182','833b2efa-8c72-f5f6-3040-cb4831e8ceb9') +[] 213384.5774 ('2067-02-10 22:02:42.113','0cd7f438-caa7-0d21-867c-1fdb6d67d797') +[99] -147316.5599 ('2000-05-09 21:37:34.776','a3ea6796-38d5-72ff-910d-8b4300831916') +[] 8828.2471 ('1993-11-30 16:53:22.503','7209213f-38bb-cfed-1955-f1fad5a9577a') +[117,9,-35] -134812.6269 ('2065-09-04 23:47:26.589','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d') +[-35,-58,-101] -9101.5369 ('2023-08-24 20:56:11.695','87fbe3f9-b1f0-c030-a4c0-8662045923b4') +[-58,87] 122510.9099 ('2019-08-09 17:40:29.849','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a') +[4,19,58] -13496.8672 ('2027-05-01 09:11:48.659','8996ae31-d670-cbfe-b735-b16b7c3b3476') +[23,-75,-89] -51218.2860 ('2010-06-02 02:49:03.396','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee') +[50] -45297.4315 ('2087-04-15 06:46:08.247','04fe9603-97fc-07a4-6248-0f21e408c884') +[-23,17,63] 89185.9462 ('2065-10-26 08:27:12.817','a5fbf764-70b4-8b65-4a8f-7550abca3859') +[-6] -129925.3690 ('2013-11-05 07:44:45.233','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab') +[-72,-108] 203171.5475 ('2000-01-28 09:34:58.032','14d5399e-7949-20c7-0e47-85e2fce5836c') +[-73,34,-27] 2676.7265 ('2057-10-25 14:37:10.049','00049a92-4350-badb-3764-dd7f019b9b31') +[65,-7] -153472.9461 ('1973-04-12 02:34:41.245','e0a0324d-1552-d11e-f3a5-fbd822d206c5') +[] 81837.7838 ('2041-09-20 20:56:39.712','f7923f2c-e526-1706-79b9-58045d9deaa7') +[-113,8] 173192.6905 ('2066-04-02 09:59:59.356','e3013e5c-92e3-c03c-b57a-e1939e00a1a7') +[107] 9694.1102 ('1984-11-02 13:11:34.034','e973db18-07b7-2117-f3ba-e7002adfa939') +[] -76460.9664 ('2051-02-10 09:54:42.143','b8344c22-9e8a-7052-c644-9c3e5989cdf1') +[59,59,0] 27041.7606 ('2083-02-17 18:21:22.547','4d6b137b-a3e1-f36d-2c0c-c8d718dda388') +[-114] 133673.9630 ('2005-10-02 20:34:27.452','04785b75-30e5-af8b-547e-d15bcb7f49fb') +[43] -169861.2000 ('2006-12-13 09:26:13.923','cb865d38-d961-d7f9-acbb-583b9f31252f') +[] 197115.2174 ('2060-04-08 04:17:00.488','0f26c4b4-b24c-1fd5-c619-31bcf71a4831') +[-25] -200081.9506 ('2055-12-25 02:30:16.276','0b32ad69-2c84-4269-9718-e3171482878a') +[14,110] -40196.4463 ('2084-08-13 19:37:07.588','ed882071-acba-b3ab-5d77-d79a9544a834') +[-62,-71,-82] -154958.9747 ('2100-07-08 02:32:53.741','7711c7c1-0d22-e302-fc86-61ef5e68db96') +[96,-114,-101] 78910.3320 ('2100-07-19 15:02:27.109','756bfd26-c4b3-94b8-e991-c7ab7a833b76') +[49] 80117.2267 ('1970-07-04 03:50:56.748','aebac019-9054-4a77-2ccd-8801fc4a7496') +[] 102078.4801 ('2055-01-07 01:22:33.624','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5') +[-106] -108728.4237 ('2020-05-27 11:56:18.121','6b7b6674-9342-2360-4cc0-f7ef8a2404de') +[] 173213.5631 ('2034-01-18 19:04:16.059','2dc0038d-67c1-f0ee-280b-f3f0f536b01a') +[42] 139872.2503 ('2001-07-16 11:09:28.754','d6487da6-1077-1053-f314-9a1079f5df15') +[] 1107.5244 ('2031-02-26 15:06:00.846','b32bee8f-85b7-3c71-bb24-9a0093e6a08c') +[] 85892.8913 ('2088-04-13 14:54:18.514','84f3b59b-8d23-78a6-3032-91392344584f') +[43] -109644.2714 ('1974-07-04 14:45:43.139','cf722ca8-15f5-6fe2-997c-0cf88e95e902') +[] 212557.3762 ('2069-03-03 07:21:08.439','9e676cac-36e6-2962-f7b1-578214f0dfbd') +[-128,55] 80471.0777 ('1970-04-01 18:54:40.257','ca358854-416b-9c95-0b9b-c7fed7bb7cb5') +[-30,-54] -132205.4512 ('2017-12-15 22:54:15.750','3558faa4-2d2f-c533-437f-1e03d3600f1d') +[-116,-72] -91499.6670 ('2105-09-23 21:06:17.755','07bb6e47-3234-c268-40d7-332388dc06f8') +[] -201636.5228 ('2085-01-27 07:54:42.717','86c3bdc3-ff0f-1723-07c2-845aa3c02370') +[-103,-39] 44330.7722 ('2064-07-02 11:08:28.068','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810') +[99] -31035.5391 ('2093-07-26 01:50:23.026','aeb59338-254f-dc09-fbd7-263da415e211') +[101] 157961.4729 ('2036-05-04 02:35:07.845','8b6221a9-8dad-4655-7460-6b3031b06893') +[111] 84732.4403 ('1997-04-06 16:10:18.624','08806a79-59f4-c833-eedc-a200bb851767') +[9,-48] -190491.5590 ('2031-11-03 19:47:03.757','914e6166-c96e-e0e4-101a-0bb516cf5a2f') +[-41] -132501.8311 ('2089-11-21 21:38:28.848','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0') +[77] 64903.6579 ('1985-04-17 17:08:03.998','26484b8a-f3f1-587f-7777-bc7a57a689c3') - diff --git a/dbms/tests/queries/0_stateless/01087_storage_generate.sql b/dbms/tests/queries/0_stateless/01087_storage_generate.sql index 54ecd3007a9..bc69e8abbac 100644 --- a/dbms/tests/queries/0_stateless/01087_storage_generate.sql +++ b/dbms/tests/queries/0_stateless/01087_storage_generate.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=GenerateRandom(3, 5, 10); +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=GenerateRandom(10, 5, 3); SELECT * FROM test_table_2 LIMIT 100; diff --git a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference index 291e5b1689a..96c73c97411 100644 --- a/dbms/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/dbms/tests/queries/0_stateless/01087_table_function_generate.reference @@ -214,25 +214,25 @@ U6 \'%Y~t9 RL,{Xs\\tw - +[] -27467.1221 ('2021-03-08 03:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') +[] 204013.7193 ('2026-05-05 05:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') [-122] -9432.2617 ('2001-08-23 08:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4') +[-30,61] -133488.2399 ('2048-05-14 09:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') +[-1] 58720.0591 ('1976-06-07 23:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') +[1] -18736.7874 ('1977-03-10 04:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') +[34,-10] -99367.9009 ('2031-05-08 10:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') [110] 31562.7502 ('2045-02-27 11:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f') [114] -84125.1554 ('2023-06-06 06:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') -[1] -18736.7874 ('1977-03-10 04:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') -[] 204013.7193 ('2026-05-05 05:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') [124] -114719.5228 ('2010-11-11 22:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') -[34,-10] -99367.9009 ('2031-05-08 10:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') -[] -27467.1221 ('2021-03-08 03:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') -[-1] 58720.0591 ('1976-06-07 23:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') -[-30,61] -133488.2399 ('2048-05-14 09:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') - [] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1984-12-06','2028-08-17 06:05:01','2036-04-02 23:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d') F743 -[55] 3047524030 li&lF 93462.3661 h 2.8979254388809897e54 ('1976-01-10','1987-07-14 00:25:51','2021-11-19 04:44:08.986','486e5b26-5fe8-fe3e-12ef-09aee40643e0') 9E75 -[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2106-02-07','2074-08-10 06:25:12','1976-12-04 18:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf') AD43 -[100,-42] 3999367674 -112975.9852 h 2.658098863752086e-160 ('2081-05-13','2071-08-07 13:34:33','1980-11-11 12:00:44.669','9754e8ac-5145-befb-63d9-a12dd1cf1f3a') DF63 -[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('2106-02-07','2090-07-31 16:45:26','2076-07-10 09:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304') EB1D -[48,-120] 3848918261 1 Date: Sun, 8 Mar 2020 12:32:22 +0300 Subject: [PATCH 203/215] Fix stateless tests with msan (#9531) * try run tests * try fix build * try enable other libraries * suppress some msan warnings * Update msan_suppressions.txt * Update msan_suppressions.txt * use function names in suppressions list * update submodule --- cmake/sanitize.cmake | 21 --------------------- contrib/openssl | 2 +- dbms/tests/msan_suppressions.txt | 9 +++++++++ 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 13947425f7b..3d192f1fe76 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -35,27 +35,6 @@ if (SANITIZE) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan") endif () - # Temporarily disable many external libraries that don't work under - # MemorySanitizer yet. - set (ENABLE_HDFS 0 CACHE BOOL "") - set (ENABLE_CAPNP 0 CACHE BOOL "") - set (ENABLE_RDKAFKA 0 CACHE BOOL "") - set (ENABLE_POCO_MONGODB 0 CACHE BOOL "") - set (ENABLE_POCO_NETSSL 0 CACHE BOOL "") - set (ENABLE_POCO_ODBC 0 CACHE BOOL "") - set (ENABLE_ODBC 0 CACHE BOOL "") - set (ENABLE_MYSQL 0 CACHE BOOL "") - set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "") - set (USE_INTERNAL_CAPNP_LIBRARY 0 CACHE BOOL "") - set (USE_SIMDJSON 0 CACHE BOOL "") - set (ENABLE_ORC 0 CACHE BOOL "") - set (ENABLE_PARQUET 0 CACHE BOOL "") - set (USE_CAPNP 0 CACHE BOOL "") - set (USE_INTERNAL_ORC_LIBRARY 0 CACHE BOOL "") - set (USE_ORC 0 CACHE BOOL "") - set (USE_AVRO 0 CACHE BOOL "") - set (ENABLE_SSL 0 CACHE BOOL "") - elseif (SANITIZE STREQUAL "thread") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=thread") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=thread") diff --git a/contrib/openssl b/contrib/openssl index debbae80cb4..07e96230645 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit debbae80cb44de55fd8040fdfbe4b506601ff2a6 +Subproject commit 07e9623064508d15dd61367f960ebe7fc9aecd77 diff --git a/dbms/tests/msan_suppressions.txt b/dbms/tests/msan_suppressions.txt index b3db90b9123..0ceef2d834c 100644 --- a/dbms/tests/msan_suppressions.txt +++ b/dbms/tests/msan_suppressions.txt @@ -4,3 +4,12 @@ fun:__gxx_personality_* # We apply std::tolower to uninitialized padding, but don't use the result, so # it is OK. Reproduce with "select ngramDistanceCaseInsensitive(materialize(''), '')" fun:tolower + +# May be it's not OK, but suppress it to run other tests +# Some functions in OpenSSL: +fun:probable_prime +fun:BN_bin2bn +fun:BN_add_word +fun:bn_div_fixed_top +fun:bn_mul_words +fun:BN_cmp From 3ed270dcb78d3b876dddd1ad11822666a694f6c7 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Sun, 8 Mar 2020 14:07:05 +0300 Subject: [PATCH 204/215] Rewrite CROSS/COMMA to INNER JOIN using table's columns knowledge (#9512) use column names in CrossToInnerJoinVisitor --- .../Interpreters/CrossToInnerJoinVisitor.cpp | 160 ++-- .../Interpreters/CrossToInnerJoinVisitor.h | 5 +- .../Interpreters/DatabaseAndTableWithAlias.h | 22 + .../ExtractExpressionInfoVisitor.cpp | 5 +- dbms/src/Interpreters/IdentifierSemantic.cpp | 61 +- dbms/src/Interpreters/IdentifierSemantic.h | 14 +- .../Interpreters/InterpreterSelectQuery.cpp | 84 +- .../JoinToSubqueryTransformVisitor.cpp | 10 +- dbms/src/Interpreters/JoinedTables.cpp | 53 +- dbms/src/Interpreters/JoinedTables.h | 22 +- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 7 +- dbms/src/Interpreters/QueryAliasesVisitor.h | 8 +- .../TranslateQualifiedNamesVisitor.cpp | 4 +- .../01095_tpch_like_smoke.reference | 14 + .../0_stateless/01095_tpch_like_smoke.sql | 802 ++++++++++++++++++ 15 files changed, 1089 insertions(+), 182 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01095_tpch_like_smoke.reference create mode 100644 dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index 740fe35e936..21cd688cd61 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -4,8 +4,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -27,41 +29,26 @@ namespace ErrorCodes namespace { -struct JoinedTable +struct JoinedElement { - DatabaseAndTableWithAlias table; - ASTTablesInSelectQueryElement * element = nullptr; - ASTTableJoin * join = nullptr; - ASTPtr array_join = nullptr; - bool has_using = false; - - JoinedTable(ASTPtr table_element) + JoinedElement(const ASTTablesInSelectQueryElement & table_element) + : element(table_element) { - element = table_element->as(); - if (!element) - throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR); + if (element.table_join) + join = element.table_join->as(); + } - if (element->table_join) - { - join = element->table_join->as(); - if (join->kind == ASTTableJoin::Kind::Cross || - join->kind == ASTTableJoin::Kind::Comma) - { - if (!join->children.empty()) - throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR); - } + void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const + { + if (!element.table_expression) + throw Exception("Not a table expression in JOIN (ARRAY JOIN?)", ErrorCodes::LOGICAL_ERROR); - if (join->using_expression_list) - has_using = true; - } + ASTTableExpression * table_expression = element.table_expression->as(); + if (!table_expression) + throw Exception("Wrong table expression in JOIN", ErrorCodes::LOGICAL_ERROR); - if (element->table_expression) - { - const auto & expr = element->table_expression->as(); - table = DatabaseAndTableWithAlias(expr); - } - - array_join = element->array_join; + if (!table.same(DatabaseAndTableWithAlias(*table_expression, current_database))) + throw Exception("Inconsistent table names", ErrorCodes::LOGICAL_ERROR); } void rewriteCommaToCross() @@ -70,7 +57,24 @@ struct JoinedTable join->kind = ASTTableJoin::Kind::Cross; } + void rewriteCrossToInner(ASTPtr on_expression) + { + join->kind = ASTTableJoin::Kind::Inner; + join->strictness = ASTTableJoin::Strictness::All; + + join->on_expression = on_expression; + join->children.push_back(join->on_expression); + } + + ASTPtr arrayJoin() const { return element.array_join; } + const ASTTableJoin * tableJoin() const { return join; } + bool canAttachOnExpression() const { return join && !join->on_expression; } + bool hasUsing() const { return join && join->using_expression_list; } + +private: + const ASTTablesInSelectQueryElement & element; + ASTTableJoin * join = nullptr; }; bool isComparison(const String & name) @@ -89,13 +93,14 @@ class CheckExpressionVisitorData public: using TypeToVisit = const ASTFunction; - CheckExpressionVisitorData(const std::vector & tables_) + CheckExpressionVisitorData(const std::vector & tables_, + const std::vector & tables_with_columns, + Aliases && aliases_) : joined_tables(tables_) + , tables(tables_with_columns) + , aliases(aliases_) , ands_only(true) - { - for (auto & joined : joined_tables) - tables.push_back(joined.table); - } + {} void visit(const ASTFunction & node, const ASTPtr & ast) { @@ -160,9 +165,10 @@ public: } private: - const std::vector & joined_tables; - std::vector tables; + const std::vector & joined_tables; + const std::vector & tables; std::map> asts_to_join_on; + Aliases aliases; bool ands_only; size_t canMoveEqualsToJoinOn(const ASTFunction & node) @@ -177,6 +183,12 @@ private: if (!left || !right) return false; + /// Moving expressions that use column aliases is not supported. + if (left->isShort() && aliases.count(left->shortName())) + return false; + if (right->isShort() && aliases.count(right->shortName())) + return false; + return checkIdentifiers(*left, *right); } @@ -185,15 +197,17 @@ private: /// @return table position to attach expression to or 0. size_t checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right) { - size_t left_table_pos = 0; - bool left_match = IdentifierSemantic::chooseTable(left, tables, left_table_pos); + std::optional left_table_pos = IdentifierSemantic::getMembership(left); + if (!left_table_pos) + left_table_pos = IdentifierSemantic::chooseTable(left, tables); - size_t right_table_pos = 0; - bool right_match = IdentifierSemantic::chooseTable(right, tables, right_table_pos); + std::optional right_table_pos = IdentifierSemantic::getMembership(right); + if (!right_table_pos) + right_table_pos = IdentifierSemantic::chooseTable(right, tables); - if (left_match && right_match && (left_table_pos != right_table_pos)) + if (left_table_pos && right_table_pos && (*left_table_pos != *right_table_pos)) { - size_t table_pos = std::max(left_table_pos, right_table_pos); + size_t table_pos = std::max(*left_table_pos, *right_table_pos); if (joined_tables[table_pos].canAttachOnExpression()) return table_pos; } @@ -205,7 +219,7 @@ using CheckExpressionMatcher = ConstOneTypeMatcher; -bool getTables(ASTSelectQuery & select, std::vector & joined_tables, size_t & num_comma) +bool getTables(ASTSelectQuery & select, std::vector & joined_tables, size_t & num_comma) { if (!select.tables()) return false; @@ -224,23 +238,37 @@ bool getTables(ASTSelectQuery & select, std::vector & joined_tables for (auto & child : tables->children) { - joined_tables.emplace_back(JoinedTable(child)); - JoinedTable & t = joined_tables.back(); - if (t.array_join) + auto table_element = child->as(); + if (!table_element) + throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR); + + joined_tables.emplace_back(JoinedElement(*table_element)); + JoinedElement & t = joined_tables.back(); + + if (t.arrayJoin()) { ++num_array_join; continue; } - if (t.has_using) + if (t.hasUsing()) { ++num_using; continue; } - if (auto * join = t.join) + if (auto * join = t.tableJoin()) + { + if (join->kind == ASTTableJoin::Kind::Cross || + join->kind == ASTTableJoin::Kind::Comma) + { + if (!join->children.empty()) + throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR); + } + if (join->kind == ASTTableJoin::Kind::Comma) ++num_comma; + } } if (num_using && (num_tables - num_array_join) > 2) @@ -251,12 +279,20 @@ bool getTables(ASTSelectQuery & select, std::vector & joined_tables if (num_array_join || num_using) return false; + return true; } } +bool CrossToInnerJoinMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +{ + if (node->as()) + return false; + return true; +} + void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = ast->as()) @@ -266,10 +302,19 @@ void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data) void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data) { size_t num_comma = 0; - std::vector joined_tables; + std::vector joined_tables; if (!getTables(select, joined_tables, num_comma)) return; + /// Check if joined_tables are consistent with known tables_with_columns + { + if (joined_tables.size() != data.tables_with_columns.size()) + throw Exception("Logical error: inconsistent number of tables", ErrorCodes::LOGICAL_ERROR); + + for (size_t i = 0; i < joined_tables.size(); ++i) + joined_tables[i].checkTableName(data.tables_with_columns[i].table, data.current_database); + } + /// COMMA to CROSS if (num_comma) @@ -283,7 +328,13 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da if (!select.where()) return; - CheckExpressionVisitor::Data visitor_data{joined_tables}; + Aliases aliases; + QueryAliasesVisitor::Data query_aliases_data{aliases}; + if (ASTPtr with = select.with()) + QueryAliasesVisitor(query_aliases_data).visit(with); + QueryAliasesVisitor(query_aliases_data).visit(select.select()); + + CheckExpressionVisitor::Data visitor_data{joined_tables, data.tables_with_columns, std::move(aliases)}; CheckExpressionVisitor(visitor_data).visit(select.where()); if (visitor_data.complex()) @@ -293,12 +344,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da { if (visitor_data.matchAny(i)) { - ASTTableJoin & join = *joined_tables[i].join; - join.kind = ASTTableJoin::Kind::Inner; - join.strictness = ASTTableJoin::Strictness::All; - - join.on_expression = visitor_data.makeOnExpression(i); - join.children.push_back(join.on_expression); + joined_tables[i].rewriteCrossToInner(visitor_data.makeOnExpression(i)); data.done = true; } } diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.h b/dbms/src/Interpreters/CrossToInnerJoinVisitor.h index 522d368e3fa..4c5ae97bc34 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.h +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.h @@ -6,6 +6,7 @@ namespace DB { class ASTSelectQuery; +struct TableWithColumnNamesAndTypes; /// AST transformer. It replaces cross joins with equivalented inner join if possible. class CrossToInnerJoinMatcher @@ -13,10 +14,12 @@ class CrossToInnerJoinMatcher public: struct Data { + const std::vector & tables_with_columns; + const String current_database; bool done = false; }; - static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + static bool needChildVisit(ASTPtr &, const ASTPtr &); static void visit(ASTPtr & ast, Data & data); private: diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 2510d6e0679..5b98669d83e 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -35,6 +35,12 @@ struct DatabaseAndTableWithAlias /// Check if it satisfies another db_table name. @note opterion is not symmetric. bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias); + + /// Exactly the same table name + bool same(const DatabaseAndTableWithAlias & db_table) const + { + return database == db_table.database && table == db_table.table && alias == db_table.alias; + } }; struct TableWithColumnNames @@ -80,6 +86,19 @@ struct TableWithColumnNamesAndTypes , columns(columns_) {} + bool hasColumn(const String & name) const + { + if (names.empty()) + { + for (auto & col : columns) + names.insert(col.name); + for (auto & col : hidden_columns) + names.insert(col.name); + } + + return names.count(name); + } + void addHiddenColumns(const NamesAndTypesList & addition) { hidden_columns.insert(hidden_columns.end(), addition.begin(), addition.end()); @@ -99,6 +118,9 @@ struct TableWithColumnNamesAndTypes return TableWithColumnNames(table, std::move(out_columns), std::move(out_hidden_columns)); } + +private: + mutable NameSet names; }; std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); diff --git a/dbms/src/Interpreters/ExtractExpressionInfoVisitor.cpp b/dbms/src/Interpreters/ExtractExpressionInfoVisitor.cpp index 1240b6a09d6..f0ca33b6b8b 100644 --- a/dbms/src/Interpreters/ExtractExpressionInfoVisitor.cpp +++ b/dbms/src/Interpreters/ExtractExpressionInfoVisitor.cpp @@ -50,9 +50,8 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr } else { - size_t best_table_pos = 0; - if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos)) - data.unique_reference_tables_pos.emplace(best_table_pos); + if (auto best_table_pos = IdentifierSemantic::chooseTable(identifier, data.tables)) + data.unique_reference_tables_pos.emplace(*best_table_pos); } } diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index ed882c1118d..1cc67107d05 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -14,29 +14,18 @@ namespace ErrorCodes namespace { -const DatabaseAndTableWithAlias & extractTable(const DatabaseAndTableWithAlias & table) -{ - return table; -} - -const DatabaseAndTableWithAlias & extractTable(const TableWithColumnNames & table) -{ - return table.table; -} - template -IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, - size_t & best_table_pos, bool allow_ambiguous) +std::optional tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, bool allow_ambiguous) { using ColumnMatch = IdentifierSemantic::ColumnMatch; - best_table_pos = 0; + size_t best_table_pos = 0; auto best_match = ColumnMatch::NoMatch; size_t same_match = 0; for (size_t i = 0; i < tables.size(); ++i) { - auto match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i])); + auto match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i]); if (match != ColumnMatch::NoMatch) { if (match > best_match) @@ -54,9 +43,13 @@ IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, { if (!allow_ambiguous) throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); - return ColumnMatch::Ambiguous; + best_match = ColumnMatch::Ambiguous; + return {}; } - return best_match; + + if (best_match != ColumnMatch::NoMatch) + return best_table_pos; + return {}; } } @@ -125,18 +118,22 @@ std::optional IdentifierSemantic::getMembership(const ASTIdentifier & id return identifier.semantic->membership; } -bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, - size_t & best_table_pos, bool ambiguous) +std::optional IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, + bool ambiguous) { - static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch; - return tryChooseTable(identifier, tables, best_table_pos, ambiguous) != no_match; + return tryChooseTable(identifier, tables, ambiguous); } -bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, - size_t & best_table_pos, bool ambiguous) +std::optional IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, + bool ambiguous) { - static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch; - return tryChooseTable(identifier, tables, best_table_pos, ambiguous) != no_match; + return tryChooseTable(identifier, tables, ambiguous); +} + +std::optional IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, + bool ambiguous) +{ + return tryChooseTable(identifier, tables, ambiguous); } std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) @@ -198,6 +195,22 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const return ColumnMatch::NoMatch; } +IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, + const TableWithColumnNames & db_and_table) +{ + /// TODO: ColumnName match logic is disabled cause caller's code is not ready for it + return canReferColumnToTable(identifier, db_and_table.table); +} + +IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, + const TableWithColumnNamesAndTypes & db_and_table) +{ + ColumnMatch match = canReferColumnToTable(identifier, db_and_table.table); + if (match == ColumnMatch::NoMatch && identifier.isShort() && db_and_table.hasColumn(identifier.shortName())) + match = ColumnMatch::ColumnName; + return match; +} + /// Strip qualificators from left side of column name. /// Example: 'database.table.name' -> 'name'. void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index e3b69abc61e..0b92b7b7716 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -22,6 +22,7 @@ struct IdentifierSemantic enum class ColumnMatch { NoMatch, + ColumnName, /// column qualified with column names list AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName) TableName, /// column qualified with table name DbAndTable, /// column qualified with database and table name @@ -40,6 +41,9 @@ struct IdentifierSemantic static std::optional extractNestedName(const ASTIdentifier & identifier, const String & table_name); static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNames & db_and_table); + static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & db_and_table); + static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static bool canBeAlias(const ASTIdentifier & identifier); @@ -47,10 +51,12 @@ struct IdentifierSemantic static void coverName(ASTIdentifier &, const String & alias); static std::optional uncover(const ASTIdentifier & identifier); static std::optional getMembership(const ASTIdentifier & identifier); - static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, - bool ambiguous = false); - static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, - bool ambiguous = false); + static std::optional chooseTable(const ASTIdentifier &, const std::vector & tables, + bool allow_ambiguous = false); + static std::optional chooseTable(const ASTIdentifier &, const std::vector & tables, + bool allow_ambiguous = false); + static std::optional chooseTable(const ASTIdentifier &, const std::vector & tables, + bool allow_ambiguous = false); private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 1c5fba86c98..8285978c7bd 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -235,23 +235,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(), ErrorCodes::TOO_DEEP_SUBQUERIES); - JoinedTables joined_tables(getSelectQuery()); - if (joined_tables.hasJoins()) - { - CrossToInnerJoinVisitor::Data cross_to_inner; - CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr); - - JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context}; - JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr); - - joined_tables.reset(getSelectQuery()); - } - - max_streams = settings.max_threads; - ASTSelectQuery & query = getSelectQuery(); - - const ASTPtr & left_table_expression = joined_tables.leftTableExpression(); - + bool has_input = input || input_pipe; if (input) { /// Read from prepared input. @@ -262,35 +246,51 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Read from prepared input. source_header = input_pipe->getHeader(); } - else if (joined_tables.isLeftTableSubquery()) - { - /// Read from subquery. - interpreter_subquery = std::make_unique( - left_table_expression, getSubqueryContext(*context), options.subquery()); - source_header = interpreter_subquery->getSampleBlock(); - } - else if (!storage) - { - if (joined_tables.isLeftTableFunction()) - { - /// Read from table function. propagate all settings from initSettings(), - /// alternative is to call on current `context`, but that can potentially pollute it. - storage = getSubqueryContext(*context).executeTableFunction(left_table_expression); - } - else - storage = joined_tables.getLeftTableStorage(*context); - } + JoinedTables joined_tables(getSubqueryContext(*context), getSelectQuery()); + + if (!has_input && !storage) + storage = joined_tables.getLeftTableStorage(); if (storage) { table_lock = storage->lockStructureForShare(false, context->getInitialQueryId()); table_id = storage->getStorageID(); - - joined_tables.resolveTables(getSubqueryContext(*context), storage); } - else - joined_tables.resolveTables(getSubqueryContext(*context), source_header.getNamesAndTypesList()); + + if (has_input || !joined_tables.resolveTables()) + joined_tables.makeFakeTable(storage, source_header); + + /// Rewrite JOINs + if (!has_input && joined_tables.tablesCount() > 1) + { + CrossToInnerJoinVisitor::Data cross_to_inner{joined_tables.tablesWithColumns(), context->getCurrentDatabase()}; + CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr); + + JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context}; + JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr); + + joined_tables.reset(getSelectQuery()); + joined_tables.resolveTables(); + + if (storage && joined_tables.isLeftTableSubquery()) + { + /// Rewritten with subquery. Free storage here locks here. + storage = {}; + table_lock.release(); + table_id = StorageID::createEmpty(); + } + } + + if (!has_input) + { + interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery()); + if (interpreter_subquery) + source_header = interpreter_subquery->getSampleBlock(); + } + + max_streams = settings.max_threads; + ASTSelectQuery & query = getSelectQuery(); auto analyze = [&] (bool try_move_to_prewhere = true) { @@ -330,11 +330,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (syntax_analyzer_result->rewrite_subqueries) { /// remake interpreter_subquery when PredicateOptimizer rewrites subqueries and main table is subquery - if (joined_tables.isLeftTableSubquery()) - interpreter_subquery = std::make_unique( - left_table_expression, - getSubqueryContext(*context), - options.subquery()); + interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery()); } } diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 6d543643436..83066996cff 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -147,9 +147,8 @@ struct ColumnAliasesMatcher { bool last_table = false; { - size_t best_table_pos = 0; - if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos)) - last_table = (best_table_pos + 1 == tables.size()); + if (auto best_table_pos = IdentifierSemantic::chooseTable(*identifier, tables)) + last_table = (*best_table_pos + 1 == tables.size()); } if (!last_table) @@ -207,10 +206,9 @@ struct ColumnAliasesMatcher bool last_table = false; String long_name; - size_t table_pos = 0; - if (IdentifierSemantic::chooseTable(node, data.tables, table_pos)) + if (auto table_pos = IdentifierSemantic::chooseTable(node, data.tables)) { - auto & table = data.tables[table_pos]; + auto & table = data.tables[*table_pos]; IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name long_name = node.name; if (&table == &data.tables.back()) diff --git a/dbms/src/Interpreters/JoinedTables.cpp b/dbms/src/Interpreters/JoinedTables.cpp index d97e78a1669..5b085b70863 100644 --- a/dbms/src/Interpreters/JoinedTables.cpp +++ b/dbms/src/Interpreters/JoinedTables.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -33,8 +34,9 @@ void checkTablesWithColumns(const std::vector & tables_with_columns, const Co } -JoinedTables::JoinedTables(const ASTSelectQuery & select_query) - : table_expressions(getTableExpressions(select_query)) +JoinedTables::JoinedTables(Context && context_, const ASTSelectQuery & select_query) + : context(context_) + , table_expressions(getTableExpressions(select_query)) , left_table_expression(extractTableExpression(select_query, 0)) , left_db_and_table(getDatabaseAndTable(select_query, 0)) {} @@ -49,9 +51,20 @@ bool JoinedTables::isLeftTableFunction() const return left_table_expression && left_table_expression->as(); } -StoragePtr JoinedTables::getLeftTableStorage(Context & context) +std::unique_ptr JoinedTables::makeLeftTableSubquery(const SelectQueryOptions & select_options) { - StoragePtr storage; + if (!isLeftTableSubquery()) + return {}; + return std::make_unique(left_table_expression, context, select_options); +} + +StoragePtr JoinedTables::getLeftTableStorage() +{ + if (isLeftTableSubquery()) + return {}; + + if (isLeftTableFunction()) + return context.executeTableFunction(left_table_expression); if (left_db_and_table) { @@ -75,42 +88,36 @@ StoragePtr JoinedTables::getLeftTableStorage(Context & context) if (tmp_table_id.database_name == database_name && tmp_table_id.table_name == table_name) { /// Read from view source. - storage = context.getViewSource(); + return context.getViewSource(); } } - if (!storage) - { - /// Read from table. Even without table expression (implicit SELECT ... FROM system.one). - storage = context.getTable(database_name, table_name); - } - - return storage; + /// Read from table. Even without table expression (implicit SELECT ... FROM system.one). + return context.getTable(database_name, table_name); } -void JoinedTables::resolveTables(const Context & context, StoragePtr storage) +bool JoinedTables::resolveTables() { tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context); checkTablesWithColumns(tables_with_columns, context); - if (tables_with_columns.empty()) + return !tables_with_columns.empty(); +} + +void JoinedTables::makeFakeTable(StoragePtr storage, const Block & source_header) +{ + if (storage) { const ColumnsDescription & storage_columns = storage->getColumns(); tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, storage_columns.getOrdinary()); + auto & table = tables_with_columns.back(); table.addHiddenColumns(storage_columns.getMaterialized()); table.addHiddenColumns(storage_columns.getAliases()); table.addHiddenColumns(storage_columns.getVirtuals()); } -} - -void JoinedTables::resolveTables(const Context & context, const NamesAndTypesList & source_columns) -{ - tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context); - checkTablesWithColumns(tables_with_columns, context); - - if (tables_with_columns.empty()) - tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_columns); + else + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList()); } } diff --git a/dbms/src/Interpreters/JoinedTables.h b/dbms/src/Interpreters/JoinedTables.h index 225d024581a..c9d24562a26 100644 --- a/dbms/src/Interpreters/JoinedTables.h +++ b/dbms/src/Interpreters/JoinedTables.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -9,6 +10,7 @@ namespace DB class ASTSelectQuery; class Context; +struct SelectQueryOptions; /// Joined tables' columns resolver. /// We want to get each table structure at most once per table occurance. Or even better once per table. @@ -16,32 +18,30 @@ class Context; class JoinedTables { public: - JoinedTables() = default; - JoinedTables(const ASTSelectQuery & select_query); + JoinedTables(Context && contex, const ASTSelectQuery & select_query); void reset(const ASTSelectQuery & select_query) { - *this = JoinedTables(select_query); + *this = JoinedTables(std::move(context), select_query); } - StoragePtr getLeftTableStorage(Context & context); - - /// Resolve columns or get from storage. It assumes storage is not nullptr. - void resolveTables(const Context & context, StoragePtr storage); - /// Resolve columns or get from source list. - void resolveTables(const Context & context, const NamesAndTypesList & source_columns); + StoragePtr getLeftTableStorage(); + bool resolveTables(); + void makeFakeTable(StoragePtr storage, const Block & source_header); const std::vector & tablesWithColumns() const { return tables_with_columns; } bool isLeftTableSubquery() const; bool isLeftTableFunction() const; - bool hasJoins() const { return table_expressions.size() > 1; } + size_t tablesCount() const { return table_expressions.size(); } - const ASTPtr & leftTableExpression() const { return left_table_expression; } const String & leftTableDatabase() const { return database_name; } const String & leftTableName() const { return table_name; } + std::unique_ptr makeLeftTableSubquery(const SelectQueryOptions & select_options); + private: + Context context; std::vector table_expressions; std::vector tables_with_columns; diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index 6de0ece8b59..6d8f0266670 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -30,7 +30,7 @@ static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, con } -bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +bool QueryAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) { /// Don't descent into table functions and subqueries and special case for ArrayJoin. if (node->as() || node->as() || node->as()) @@ -38,7 +38,7 @@ bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) return true; } -void QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) +void QueryAliasesMatcher::visit(const ASTPtr & ast, Data & data) { if (auto * s = ast->as()) visit(*s, ast, data); @@ -81,8 +81,9 @@ void QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & /// set unique aliases for all subqueries. this is needed, because: /// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect /// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names -void QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) +void QueryAliasesMatcher::visit(const ASTSubquery & const_subquery, const ASTPtr & ast, Data & data) { + ASTSubquery & subquery = const_cast(const_subquery); Aliases & aliases = data.aliases; static std::atomic_uint64_t subquery_index = 0; diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index c4e297965c3..62242b500f7 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -15,19 +15,19 @@ struct ASTArrayJoin; class QueryAliasesMatcher { public: - using Visitor = InDepthNodeVisitor; + using Visitor = ConstInDepthNodeVisitor; struct Data { Aliases & aliases; }; - static void visit(ASTPtr & ast, Data & data); - static bool needChildVisit(ASTPtr & node, const ASTPtr & child); + static void visit(const ASTPtr & ast, Data & data); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); private: static void visit(const ASTSelectQuery & select, const ASTPtr & ast, Data & data); - static void visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); + static void visit(const ASTSubquery & subquery, const ASTPtr & ast, Data & data); static void visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); static void visitOther(const ASTPtr & ast, Data & data); }; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 04f59b81f0f..27542b97691 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -93,10 +93,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (IdentifierSemantic::getColumnName(identifier)) { String short_name = identifier.shortName(); - size_t table_pos = 0; bool allow_ambiguous = data.join_using_columns.count(short_name); - if (IdentifierSemantic::chooseTable(identifier, data.tables, table_pos, allow_ambiguous)) + if (auto best_pos = IdentifierSemantic::chooseTable(identifier, data.tables, allow_ambiguous)) { + size_t table_pos = *best_pos; if (data.unknownColumn(table_pos, identifier)) { String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false); diff --git a/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.reference b/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.reference new file mode 100644 index 00000000000..20caccfed83 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.reference @@ -0,0 +1,14 @@ +1 +3 +5 +6 +0.0000 +9 +10 +12 +14 +0.00000000 +16 +18 +19 +0.0000 diff --git a/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql b/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql new file mode 100644 index 00000000000..ae43a9cfc28 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01095_tpch_like_smoke.sql @@ -0,0 +1,802 @@ +CREATE DATABASE IF NOT EXISTS tpch; +USE tpch; + +DROP TABLE IF EXISTS part; +DROP TABLE IF EXISTS supplier; +DROP TABLE IF EXISTS partsupp; +DROP TABLE IF EXISTS customer; +DROP TABLE IF EXISTS orders; +DROP TABLE IF EXISTS lineitem; +DROP TABLE IF EXISTS nation; +DROP TABLE IF EXISTS region; + +CREATE TABLE part +( + p_partkey Int32, -- PK + p_name String, -- variable text, size 55 + p_mfgr FixedString(25), + p_brand FixedString(10), + p_type String, -- variable text, size 25 + p_size Int32, -- integer + p_container FixedString(10), + p_retailprice Decimal(18,2), + p_comment String, -- variable text, size 23 + CONSTRAINT pk CHECK p_partkey >= 0, + CONSTRAINT positive CHECK (p_size >= 0 AND p_retailprice >= 0) +) engine = MergeTree ORDER BY (p_partkey); + +CREATE TABLE supplier +( + s_suppkey Int32, -- PK + s_name FixedString(25), + s_address String, -- variable text, size 40 + s_nationkey Int32, -- FK n_nationkey + s_phone FixedString(15), + s_acctbal Decimal(18,2), + s_comment String, -- variable text, size 101 + CONSTRAINT pk CHECK s_suppkey >= 0 +) engine = MergeTree ORDER BY (s_suppkey); + +CREATE TABLE partsupp +( + ps_partkey Int32, -- PK(1), FK p_partkey + ps_suppkey Int32, -- PK(2), FK s_suppkey + ps_availqty Int32, -- integer + ps_supplycost Decimal(18,2), + ps_comment String, -- variable text, size 199 + CONSTRAINT pk CHECK ps_partkey >= 0, + CONSTRAINT c1 CHECK (ps_availqty >= 0 AND ps_supplycost >= 0) +) engine = MergeTree ORDER BY (ps_partkey, ps_suppkey); + +CREATE TABLE customer +( + c_custkey Int32, -- PK + c_name String, -- variable text, size 25 + c_address String, -- variable text, size 40 + c_nationkey Int32, -- FK n_nationkey + c_phone FixedString(15), + c_acctbal Decimal(18,2), + c_mktsegment FixedString(10), + c_comment String, -- variable text, size 117 + CONSTRAINT pk CHECK c_custkey >= 0 +) engine = MergeTree ORDER BY (c_custkey); + +CREATE TABLE orders +( + o_orderkey Int32, -- PK + o_custkey Int32, -- FK c_custkey + o_orderstatus FixedString(1), + o_totalprice Decimal(18,2), + o_orderdate Date, + o_orderpriority FixedString(15), + o_clerk FixedString(15), + o_shippriority Int32, -- integer + o_comment String, -- variable text, size 79 + CONSTRAINT c1 CHECK o_totalprice >= 0 +) engine = MergeTree ORDER BY (o_orderdate, o_orderkey); + +CREATE TABLE lineitem +( + l_orderkey Int32, -- PK(1), FK o_orderkey + l_partkey Int32, -- FK ps_partkey + l_suppkey Int32, -- FK ps_suppkey + l_linenumber Int32, -- PK(2) + l_quantity Decimal(18,2), + l_extendedprice Decimal(18,2), + l_discount Decimal(18,2), + l_tax Decimal(18,2), + l_returnflag FixedString(1), + l_linestatus FixedString(1), + l_shipdate Date, + l_commitdate Date, + l_receiptdate Date, + l_shipinstruct FixedString(25), + l_shipmode FixedString(10), + l_comment String, -- variable text size 44 + CONSTRAINT c1 CHECK (l_quantity >= 0 AND l_extendedprice >= 0 AND l_tax >= 0 AND l_shipdate <= l_receiptdate) +-- CONSTRAINT c2 CHECK (l_discount >= 0 AND l_discount <= 1) +) engine = MergeTree ORDER BY (l_shipdate, l_receiptdate, l_orderkey, l_linenumber); + +CREATE TABLE nation +( + n_nationkey Int32, -- PK + n_name FixedString(25), + n_regionkey Int32, -- FK r_regionkey + n_comment String, -- variable text, size 152 + CONSTRAINT pk CHECK n_nationkey >= 0 +) Engine = MergeTree ORDER BY (n_nationkey); + +CREATE TABLE region +( + r_regionkey Int32, -- PK + r_name FixedString(25), + r_comment String, -- variable text, size 152 + CONSTRAINT pk CHECK r_regionkey >= 0 +) engine = MergeTree ORDER BY (r_regionkey); + +select 1; +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= toDate('1998-12-01') - interval 90 day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; + +-- select 2; -- rewrite fail +-- select +-- s_acctbal, +-- s_name, +-- n_name, +-- p_partkey, +-- p_mfgr, +-- s_address, +-- s_phone, +-- s_comment +-- from +-- part, +-- supplier, +-- partsupp, +-- nation, +-- region +-- where +-- p_partkey = ps_partkey +-- and s_suppkey = ps_suppkey +-- and p_size = 15 +-- and p_type like '%BRASS' +-- and s_nationkey = n_nationkey +-- and n_regionkey = r_regionkey +-- and r_name = 'EUROPE' +-- and ps_supplycost = ( +-- select +-- min(ps_supplycost) +-- from +-- partsupp, +-- supplier, +-- nation, +-- region +-- where +-- p_partkey = ps_partkey +-- and s_suppkey = ps_suppkey +-- and s_nationkey = n_nationkey +-- and n_regionkey = r_regionkey +-- and r_name = 'EUROPE' +-- ) +-- order by +-- s_acctbal desc, +-- n_name, +-- s_name, +-- p_partkey +-- limit 100; + +select 3; +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < toDate('1995-03-15') + and l_shipdate > toDate('1995-03-15') +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 10; + +-- select 4; +-- select +-- o_orderpriority, +-- count(*) as order_count +-- from +-- orders +-- where +-- o_orderdate >= toDate('1993-07-01') +-- and o_orderdate < toDate('1993-07-01') + interval '3' month +-- and exists ( +-- select +-- * +-- from +-- lineitem +-- where +-- l_orderkey = o_orderkey +-- and l_commitdate < l_receiptdate +-- ) +-- group by +-- o_orderpriority +-- order by +-- o_orderpriority; + +select 5; +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= toDate('1994-01-01') + and o_orderdate < toDate('1994-01-01') + interval '1' year +group by + n_name +order by + revenue desc; + +select 6; +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= toDate('1994-01-01') + and l_shipdate < toDate('1994-01-01') + interval '1' year + and l_discount between toDecimal32(0.06, 2) - toDecimal32(0.01, 2) + and toDecimal32(0.06, 2) + toDecimal32(0.01, 2) + and l_quantity < 24; + +-- select 7; +-- select +-- supp_nation, +-- cust_nation, +-- l_year, +-- sum(volume) as revenue +-- from +-- ( +-- select +-- n1.n_name as supp_nation, +-- n2.n_name as cust_nation, +-- extract(year from l_shipdate) as l_year, +-- l_extendedprice * (1 - l_discount) as volume +-- from +-- supplier, +-- lineitem, +-- orders, +-- customer, +-- nation n1, +-- nation n2 +-- where +-- s_suppkey = l_suppkey +-- and o_orderkey = l_orderkey +-- and c_custkey = o_custkey +-- and s_nationkey = n1.n_nationkey +-- and c_nationkey = n2.n_nationkey +-- and ( +-- (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') +-- or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') +-- ) +-- and l_shipdate between toDate('1995-01-01') and toDate('1996-12-31') +-- ) as shipping +-- group by +-- supp_nation, +-- cust_nation, +-- l_year +-- order by +-- supp_nation, +-- cust_nation, +-- l_year; + +-- select 8; +-- select +-- o_year, +-- sum(case +-- when nation = 'BRAZIL' then volume +-- else 0 +-- end) / sum(volume) as mkt_share +-- from +-- ( +-- select +-- extract(year from o_orderdate) as o_year, +-- l_extendedprice * (1 - l_discount) as volume, +-- n2.n_name as nation +-- from +-- part, +-- supplier, +-- lineitem, +-- orders, +-- customer, +-- nation n1, +-- nation n2, +-- region +-- where +-- p_partkey = l_partkey +-- and s_suppkey = l_suppkey +-- and l_orderkey = o_orderkey +-- and o_custkey = c_custkey +-- and c_nationkey = n1.n_nationkey +-- and n1.n_regionkey = r_regionkey +-- and r_name = 'AMERICA' +-- and s_nationkey = n2.n_nationkey +-- and o_orderdate between toDate('1995-01-01') and toDate('1996-12-31') +-- and p_type = 'ECONOMY ANODIZED STEEL' +-- ) as all_nations +-- group by +-- o_year +-- order by +-- o_year; + +select 9; +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; + +select 10; +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= toDate('1993-10-01') + and o_orderdate < toDate('1993-10-01') + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20; + +-- select 11; -- rewrite fail +-- select +-- ps_partkey, +-- sum(ps_supplycost * ps_availqty) as value +-- from +-- partsupp, +-- supplier, +-- nation +-- where +-- ps_suppkey = s_suppkey +-- and s_nationkey = n_nationkey +-- and n_name = 'GERMANY' +-- group by +-- ps_partkey having +-- sum(ps_supplycost * ps_availqty) > ( +-- select +-- sum(ps_supplycost * ps_availqty) * 0.0100000000 +-- -- ^^^^^^^^^^^^ +-- -- The above constant needs to be adjusted according +-- -- to the scale factor (SF): constant = 0.0001 / SF. +-- from +-- partsupp, +-- supplier, +-- nation +-- where +-- ps_suppkey = s_suppkey +-- and s_nationkey = n_nationkey +-- and n_name = 'GERMANY' +-- ) +-- order by +-- value desc; + +select 12; +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= toDate('1994-01-01') + and l_receiptdate < toDate('1994-01-01') + interval '1' year +group by + l_shipmode +order by + l_shipmode; + +-- select 13; -- rewrite fail +-- select +-- c_count, +-- count(*) as custdist +-- from +-- ( +-- select +-- c_custkey, +-- count(o_orderkey) +-- from +-- customer left outer join orders on +-- c_custkey = o_custkey +-- and o_comment not like '%special%requests%' +-- group by +-- c_custkey +-- ) as c_orders +-- group by +-- c_count +-- order by +-- custdist desc, +-- c_count desc; + +select 14; +select + toDecimal32(100.00, 2) * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / (1 + sum(l_extendedprice * (1 - l_discount))) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= toDate('1995-09-01') + and l_shipdate < toDate('1995-09-01') + interval '1' month; + +-- select 15; +-- create view revenue0 as +-- select +-- l_suppkey, +-- sum(l_extendedprice * (1 - l_discount)) +-- from +-- lineitem +-- where +-- l_shipdate >= toDate('1996-01-01') +-- and l_shipdate < toDate('1996-01-01') + interval '3' month +-- group by +-- l_suppkey; +-- select +-- s_suppkey, +-- s_name, +-- s_address, +-- s_phone, +-- total_revenue +-- from +-- supplier, +-- revenue0 +-- where +-- s_suppkey = supplier_no +-- and total_revenue = ( +-- select +-- max(total_revenue) +-- from +-- revenue0 +-- ) +-- order by +-- s_suppkey; +-- drop view revenue0; + +select 16; +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; + +-- select 17; +-- select +-- sum(l_extendedprice) / 7.0 as avg_yearly +-- from +-- lineitem, +-- part +-- where +-- p_partkey = l_partkey +-- and p_brand = 'Brand#23' +-- and p_container = 'MED BOX' +-- and l_quantity < ( +-- select +-- 0.2 * avg(l_quantity) +-- from +-- lineitem +-- where +-- l_partkey = p_partkey +-- ); + +select 18; +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 300 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100; + +select 19; +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 1 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#34' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); + +-- select 20; +-- select +-- s_name, +-- s_address +-- from +-- supplier, +-- nation +-- where +-- s_suppkey in ( +-- select +-- ps_suppkey +-- from +-- partsupp +-- where +-- ps_partkey in ( +-- select +-- p_partkey +-- from +-- part +-- where +-- p_name like 'forest%' +-- ) +-- and ps_availqty > ( +-- select +-- 0.5 * sum(l_quantity) +-- from +-- lineitem +-- where +-- l_partkey = ps_partkey +-- and l_suppkey = ps_suppkey +-- and l_shipdate >= toDate('1994-01-01') +-- and l_shipdate < toDate('1994-01-01') + interval '1' year +-- ) +-- ) +-- and s_nationkey = n_nationkey +-- and n_name = 'CANADA' +-- order by +-- s_name; + +-- select 21; +-- select +-- s_name, +-- count(*) as numwait +-- from +-- supplier, +-- lineitem l1, +-- orders, +-- nation +-- where +-- s_suppkey = l1.l_suppkey +-- and o_orderkey = l1.l_orderkey +-- and o_orderstatus = 'F' +-- and l1.l_receiptdate > l1.l_commitdate +-- and exists ( +-- select +-- * +-- from +-- lineitem l2 +-- where +-- l2.l_orderkey = l1.l_orderkey +-- and l2.l_suppkey <> l1.l_suppkey +-- ) +-- and not exists ( +-- select +-- * +-- from +-- lineitem l3 +-- where +-- l3.l_orderkey = l1.l_orderkey +-- and l3.l_suppkey <> l1.l_suppkey +-- and l3.l_receiptdate > l3.l_commitdate +-- ) +-- and s_nationkey = n_nationkey +-- and n_name = 'SAUDI ARABIA' +-- group by +-- s_name +-- order by +-- numwait desc, +-- s_name +-- limit 100; + +-- select 22; +-- select +-- cntrycode, +-- count(*) as numcust, +-- sum(c_acctbal) as totacctbal +-- from +-- ( +-- select +-- substring(c_phone from 1 for 2) as cntrycode, +-- c_acctbal +-- from +-- customer +-- where +-- substring(c_phone from 1 for 2) in +-- ('13', '31', '23', '29', '30', '18', '17') +-- and c_acctbal > ( +-- select +-- avg(c_acctbal) +-- from +-- customer +-- where +-- c_acctbal > 0.00 +-- and substring(c_phone from 1 for 2) in +-- ('13', '31', '23', '29', '30', '18', '17') +-- ) +-- and not exists ( +-- select +-- * +-- from +-- orders +-- where +-- o_custkey = c_custkey +-- ) +-- ) as custsale +-- group by +-- cntrycode +-- order by +-- cntrycode; + +DROP TABLE part; +DROP TABLE supplier; +DROP TABLE partsupp; +DROP TABLE customer; +DROP TABLE orders; +DROP TABLE lineitem; +DROP TABLE nation; +DROP TABLE region; From 87d909119579efa792e41ab96c3f77c1f92a009e Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Sun, 8 Mar 2020 22:22:20 +0900 Subject: [PATCH 205/215] Set can_be_false to true when it applies to the bloom_filter --- .../MergeTreeIndexConditionBloomFilter.cpp | 2 +- ...78_bloom_filter_operator_not_has.reference | 20 ++++++++++++++++ .../01078_bloom_filter_operator_not_has.sql | 23 +++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.reference create mode 100644 dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.sql diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 01f81c3ce1f..acabb7130cc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -161,7 +161,7 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex match_rows = maybeTrueOnBloomFilter(&*hash_column, filter, hash_functions); } - rpn_stack.emplace_back(match_rows, !match_rows); + rpn_stack.emplace_back(match_rows, true); if (element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_IN) rpn_stack.back() = !rpn_stack.back(); } diff --git a/dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.reference b/dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.reference new file mode 100644 index 00000000000..7e1f383cb21 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.reference @@ -0,0 +1,20 @@ +4 +1 +3 +[] 2020-02-27 +['o','a'] 2020-02-27 +2 +[] 2020-02-27 +['e','a','b'] 2020-02-27 +['o','a'] 2020-02-27 +1 +[] 2020-02-27 +['e','a','b'] 2020-02-27 +['o','a'] 2020-02-27 +['o','a','b','c'] 2020-02-27 +0 +[] 2020-02-27 +['e','a','b'] 2020-02-27 +['e','a','b','c','d'] 2020-02-27 +['o','a'] 2020-02-27 +['o','a','b','c'] 2020-02-27 diff --git a/dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.sql b/dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.sql new file mode 100644 index 00000000000..20eabdb081f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01078_bloom_filter_operator_not_has.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS bloom_filter_not_has; + +CREATE TABLE bloom_filter_not_has (ary Array(LowCardinality(Nullable(String))), d Date, INDEX idx_ary ary TYPE bloom_filter(0.01) GRANULARITY 1024) ENGINE = MergeTree() PARTITION BY d ORDER BY d; + +INSERT INTO bloom_filter_not_has VALUES ([], '2020-02-27') (['o','a'], '2020-02-27') (['e','a','b'], '2020-02-27'); +INSERT INTO bloom_filter_not_has VALUES (['o','a','b','c'], '2020-02-27') (['e','a','b','c','d'], '2020-02-27'); + +SELECT count() FROM bloom_filter_not_has WHERE has(ary, 'a'); +SELECT count() FROM bloom_filter_not_has WHERE NOT has(ary, 'a'); + +SELECT count() FROM bloom_filter_not_has WHERE has(ary, 'b'); +SELECT * FROM bloom_filter_not_has WHERE NOT has(ary, 'b') ORDER BY ary; + +SELECT count() FROM bloom_filter_not_has WHERE has(ary, 'c'); +SELECT * FROM bloom_filter_not_has WHERE NOT has(ary, 'c') ORDER BY ary; + +SELECT count() FROM bloom_filter_not_has WHERE has(ary, 'd'); +SELECT * FROM bloom_filter_not_has WHERE NOT has(ary, 'd') ORDER BY ary; + +SELECT count() FROM bloom_filter_not_has WHERE has(ary, 'f'); +SELECT * FROM bloom_filter_not_has WHERE NOT has(ary, 'f') ORDER BY ary; + +DROP TABLE IF EXISTS bloom_filter_not_has; From bce00a6bf2cc092a3b973a395f42df637843c333 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 8 Mar 2020 17:57:31 +0300 Subject: [PATCH 206/215] Fix parallel tests --- .../01079_parallel_alter_add_drop_column_zookeeper.sh | 2 +- .../0_stateless/01079_parallel_alter_detach_table_zookeeper.sh | 2 +- .../0_stateless/01079_parallel_alter_modify_zookeeper.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/dbms/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index 7a71df32c8e..1fbccf47c72 100755 --- a/dbms/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -106,6 +106,6 @@ for i in `seq $REPLICAS`; do $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_$i'" $CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_$i'" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_$i'" - $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_$i'" + $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_add_drop_$i' and (type = 'ALTER_METADATA' or type = 'MUTATE_PART')" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_alter_add_drop_$i" done diff --git a/dbms/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/dbms/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index 319c65cc5a7..0ec6f01137f 100755 --- a/dbms/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -105,6 +105,6 @@ for i in `seq $REPLICAS`; do $CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_alter_detach_$i" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done=0 and table = 'concurrent_alter_detach_$i'" # all mutations have to be done $CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE is_done=0 and table = 'concurrent_alter_detach_$i'" # all mutations have to be done - $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_detach_$i'" # all mutations have to be done + $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_detach_$i' and (type = 'ALTER_METADATA' or type = 'MUTATE_PART')" # all mutations and alters have to be done $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_alter_detach_$i" done diff --git a/dbms/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/dbms/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index 31643d21815..5d181727301 100755 --- a/dbms/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -110,6 +110,6 @@ for i in `seq $REPLICAS`; do $CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_alter_mt_$i" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done=0 and table = 'concurrent_alter_mt_$i'" # all mutations have to be done $CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE is_done=0 and table = 'concurrent_alter_mt_$i'" - $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_mt_$i'" + $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE table = 'concurrent_alter_mt_$i' and (type = 'ALTER_METADATA' or type = 'MUTATE_PART')" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_alter_mt_$i" done From 0bc86507e5c8f3587593b8ace0e6f606038ba3b2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 8 Mar 2020 18:21:48 +0300 Subject: [PATCH 207/215] Fix error when tests are being run on partially downloaded data --- docker/test/stateful_with_coverage/Dockerfile | 2 +- docker/test/stateful_with_coverage/run.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docker/test/stateful_with_coverage/Dockerfile b/docker/test/stateful_with_coverage/Dockerfile index 863e55e6326..e5ddf3d3475 100644 --- a/docker/test/stateful_with_coverage/Dockerfile +++ b/docker/test/stateful_with_coverage/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-stateful-test . +# docker build -t yandex/clickhouse-stateful-test-with-coverage . FROM yandex/clickhouse-stateless-test RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index 0c7e3e50cb9..cf5eb0ee598 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -71,7 +71,11 @@ start_clickhouse sleep 5 -/s3downloader --dataset-names $DATASETS +if ! /s3downloader --dataset-names $DATASETS; then + echo "Cannot download datatsets" + exit 1 +fi + chmod 777 -R /var/lib/clickhouse From 49894c7bf2167029e2be733992abcdfe4d542581 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 20:00:08 +0300 Subject: [PATCH 208/215] Update libunwind for WSL --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index 68cffcbbd18..ede00622ff8 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 68cffcbbd1840e14664a5f7f19c5e43f65c525b5 +Subproject commit ede00622ff8ecb1848ed22187eabbfaf8b4e9307 From 75754f309eaf1e2b4296390a54acc2538a3ea4d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 23:28:17 +0300 Subject: [PATCH 209/215] Normalize running time of some queries in performance tests --- dbms/tests/performance/bitCount.xml | 2 +- dbms/tests/performance/date_time.xml | 11 ++---- .../performance/generate_table_function.xml | 38 +++++++++---------- dbms/tests/performance/joins_in_memory.xml | 10 ++--- 4 files changed, 29 insertions(+), 32 deletions(-) diff --git a/dbms/tests/performance/bitCount.xml b/dbms/tests/performance/bitCount.xml index 34fdb24c10b..4519fdfd431 100644 --- a/dbms/tests/performance/bitCount.xml +++ b/dbms/tests/performance/bitCount.xml @@ -21,5 +21,5 @@ - SELECT bitCount({expr}) FROM numbers(1000000) FORMAT Null + SELECT bitCount({expr}) FROM numbers(100000000) FORMAT Null diff --git a/dbms/tests/performance/date_time.xml b/dbms/tests/performance/date_time.xml index 858b20d5784..56af5b85e19 100644 --- a/dbms/tests/performance/date_time.xml +++ b/dbms/tests/performance/date_time.xml @@ -131,11 +131,8 @@ - SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) - - SELECT count() FROM numbers(100000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) - - SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) - - SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) diff --git a/dbms/tests/performance/generate_table_function.xml b/dbms/tests/performance/generate_table_function.xml index 7e14a55b5c7..c53ec285bf5 100644 --- a/dbms/tests/performance/generate_table_function.xml +++ b/dbms/tests/performance/generate_table_function.xml @@ -6,23 +6,23 @@ - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i FixedString(4)', 10, 10, 1) LIMIT 100000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i String', 10, 10, 1) LIMIT 100000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(UUID))', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i FixedString(4)', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i String', 0, 10, 10) LIMIT 10000000); diff --git a/dbms/tests/performance/joins_in_memory.xml b/dbms/tests/performance/joins_in_memory.xml index 1d3b14ae962..31e75984003 100644 --- a/dbms/tests/performance/joins_in_memory.xml +++ b/dbms/tests/performance/joins_in_memory.xml @@ -9,11 +9,11 @@ CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory - INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) - INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) - INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) - INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) - INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(50000) + INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(50000) + INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(50000) + INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(50000) + INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(50000) SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64 WHERE i32 = 200042 SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64,i32,i16,i8 WHERE i32 = 200042 From a542eae3880cdbc1fd450ada42593816b3cf1cad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Mar 2020 23:47:25 +0300 Subject: [PATCH 210/215] Added results for Pinebook Pro --- website/benchmark_hardware.html | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index c5801e289f2..67de998f41a 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -2223,6 +2223,57 @@ var results = [0.005, 0.004, 0.004] ] }, + + { + "system": "Pinebook Pro (AArch64, 4 GiB RAM)", + "time": "2020-03-08 00:00:00", + "result": + [ +[0.021, 0.009, 0.007], +[0.195, 0.135, 0.144], +[0.439, 0.264, 0.273], +[1.266, 0.672, 0.706], +[1.337, 0.795, 0.790], +[2.706, 1.989, 1.947], +[0.246, 0.198, 0.197], +[0.157, 0.142, 0.133], +[4.150, 3.769, 3.617], +[5.223, 4.405, 4.234], +[2.391, 1.815, 1.785], +[2.534, 2.158, 2.042], +[7.895, 6.890, 7.003], +[10.338, 9.311, 9.410], +[8.139, 7.441, 7.312], +[8.532, 8.035, 8.011], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null] + ] + } ]; @@ -2654,6 +2705,7 @@ Results for Huawei Taishan are from Peng Gao in sina.com.
Results for Selectel and AMD EPYC 7402P are from Andrey Dudin.
Results for ProLiant are from Denis Ustinov.
Results for AMD EPYC 7502P are from Kostiantyn Velychkovskyi.
+Results for Pinebook Pro are from Aleksey R. @kITerE.
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
From 06e00b2018050d4515ad9e3e77aeff0e05d1570b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 9 Mar 2020 07:08:19 +0300 Subject: [PATCH 211/215] Fixed build --- contrib/jemalloc-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 5b420246168..a03d3aa31b4 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -33,6 +33,7 @@ ${JEMALLOC_SOURCE_DIR}/src/test_hooks.c ${JEMALLOC_SOURCE_DIR}/src/ticker.c ${JEMALLOC_SOURCE_DIR}/src/tsd.c ${JEMALLOC_SOURCE_DIR}/src/witness.c +${JEMALLOC_SOURCE_DIR}/src/safety_check.c ) if(OS_DARWIN) From cd5a08c242faf1c479ad138e94723230e19d366e Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 9 Mar 2020 12:23:53 +0800 Subject: [PATCH 212/215] New test cases for Date/DateTime/DateTime64 output to string. --- dbms/src/IO/tests/gtest_DateTimeToString.cpp | 220 +++++++++++++++++++ dbms/src/IO/tests/gtest_rfc1123.cpp | 14 -- 2 files changed, 220 insertions(+), 14 deletions(-) create mode 100644 dbms/src/IO/tests/gtest_DateTimeToString.cpp delete mode 100644 dbms/src/IO/tests/gtest_rfc1123.cpp diff --git a/dbms/src/IO/tests/gtest_DateTimeToString.cpp b/dbms/src/IO/tests/gtest_DateTimeToString.cpp new file mode 100644 index 00000000000..1d68d2a82bd --- /dev/null +++ b/dbms/src/IO/tests/gtest_DateTimeToString.cpp @@ -0,0 +1,220 @@ +#include + +#include +#include +#include + +namespace +{ +using namespace DB; + +struct DateTime64WithScale +{ + DateTime64 value; + UInt32 scale; +}; + +template +auto getTypeName(const ValueType &) +{ + if constexpr (std::is_same_v) + { + return "DayNum"; + } + else if constexpr (std::is_same_v) + { + return "time_t"; + } + else if constexpr (std::is_same_v) + { + return "DateTime64WithScale"; + } + else + { + static_assert("unsupported ValueType"); + } +} + +std::ostream & dump_datetime(std::ostream & ostr, const DayNum & d) +{ + return ostr << getTypeName(d) << "{" << d.toUnderType() << "}"; +} + +std::ostream & dump_datetime(std::ostream & ostr, const time_t & dt) +{ + return ostr << getTypeName(dt) << "{" << dt << "}"; +} + +std::ostream & dump_datetime(std::ostream & ostr, const DateTime64WithScale & dt64) +{ + return ostr << getTypeName(dt64) << "{" << dt64.value.value << ", scale: " << dt64.scale << "}"; +} + +template +struct DateTimeToStringParamTestCase +{ + const char* description; + const ValueType input; + const char* expected; + const char* timezone = "UTC"; +}; + +template +std::ostream & operator << (std::ostream & ostr, const DateTimeToStringParamTestCase & test_case) +{ + ostr << "DateTimeToStringParamTestCase<" << getTypeName(test_case.input) << ">{" + << "\n\t\"" << test_case.description << "\"" + << "\n\tinput : "; + dump_datetime(ostr, test_case.input) + << "\n\texpected : " << test_case.expected + << "\n\ttimezone : " << test_case.timezone + << "\n}"; + return ostr; +} + +} + +TEST(DateTimeToStringTest, RFC1123) +{ + using namespace DB; + WriteBufferFromOwnString out; + writeDateTimeTextRFC1123(1111111111, out, DateLUT::instance("UTC")); + ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT"); +} + +template +class DateTimeToStringParamTestBase : public ::testing::TestWithParam> +{ +public: + void Test(const DateTimeToStringParamTestCase & param) + { + [[maybe_unused]] const auto & [description, input, expected, timezone] = param; + + using namespace DB; + WriteBufferFromOwnString out; + + if constexpr (std::is_same_v) + { + writeDateText(input, out); + } + else if constexpr (std::is_same_v) + { + writeDateTimeText(input, out, DateLUT::instance(timezone)); + } + else if constexpr (std::is_same_v) + { + writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone)); + } + else + { + static_assert("unsupported ValueType"); + } + + ASSERT_EQ(expected, out.str()); + } +}; + +class DateTimeToStringParamTestDayNum : public DateTimeToStringParamTestBase +{}; + +TEST_P(DateTimeToStringParamTestDayNum, writeDateText) +{ + ASSERT_NO_FATAL_FAILURE(Test(GetParam())); +} + +class DateTimeToStringParamTestTimeT : public DateTimeToStringParamTestBase +{}; + +TEST_P(DateTimeToStringParamTestTimeT, writeDateText) +{ + ASSERT_NO_FATAL_FAILURE(Test(GetParam())); +} + +class DateTimeToStringParamTestDateTime64 : public DateTimeToStringParamTestBase +{}; + +TEST_P(DateTimeToStringParamTestDateTime64, writeDateText) +{ + ASSERT_NO_FATAL_FAILURE(Test(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDayNum, + ::testing::ValuesIn(std::initializer_list> + { + { + "Zero DayNum has special representation of all zeroes despite pointing to 1970-01-01", + DayNum(0), + "0000-00-00" + }, + { + "Non-Zero DayNum", + DayNum(1), + "1970-01-02" + }, + { + "Non-Zero DayNum", + DayNum(10 * 365), + "1979-12-30" + }, + { + "Negative DayNum value wraps as if it was UInt16 due to LUT limitations and to maintain compatibility with existing code.", + DayNum(-10 * 365), + "2106-02-07" + }, + }) +); + +INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestTimeT, + ::testing::ValuesIn(std::initializer_list> + { + { + "Zero time_t has special representation of all-zeroes despite pointing to 1970-01-01 00:00:00", + time_t(0), + "0000-00-00 00:00:00" + }, + { + "Non-Zero time_t is a valid date/time", + time_t(100LL * 365 * 3600 * 24 + 123456), + "2069-12-08 10:17:36" + }, +// { // Negative time_t value produces (expectedly) bogus results, +// // and there is no reliable way to verify output values on all platforms and configurations +// // (since part of stacktrace is printed), so this test case is disabled. +// "Negative time_t value wraps as if it was UInt32 due to LUT limitations.", +// time_t(-1LL * 365 * 3600 * 24), +// "2006-03-03 06:28:16" +// }, + }) +); + +INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64, + ::testing::ValuesIn(std::initializer_list> + { + /// Inside basic LUT boundaries + { + "Zero DateTime64 with scale 0 is represented as valid date/time", + DateTime64WithScale{0, 0}, + "0000-00-00 00:00:00." + }, + { + "Zero DateTime64 with scale 3 is repsented as valid data/time", + DateTime64WithScale{0, 3}, + "0000-00-00 00:00:00.000" + }, + { + "Non-Zero DateTime64 with scale 0", + DateTime64WithScale{10 * 365 * 3600 * 24, 0}, + "1979-12-30 00:00:00" + }, + { + "Non-Zero DateTime64 with scale 3", + DateTime64WithScale{10 * 365 * 3600 * 24 * 1000ULL + 123, 3}, + "1979-12-30 00:00:00.123" + }, +// { +// "Negative time_t value wraps around as if it was UInt32 due to LUT limitations and to maintain compatibility with existing code", +// time_t(-10 * 365 * 3600 * 24), +// "1979-12-30 08:00:00" +// }, + }) +); diff --git a/dbms/src/IO/tests/gtest_rfc1123.cpp b/dbms/src/IO/tests/gtest_rfc1123.cpp deleted file mode 100644 index 66d7484de1f..00000000000 --- a/dbms/src/IO/tests/gtest_rfc1123.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include - -#include -#include -#include - - -TEST(RFC1123, Test) -{ - using namespace DB; - WriteBufferFromOwnString out; - writeDateTimeTextRFC1123(1111111111, out, DateLUT::instance("UTC")); - ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT"); -} From 593e0a1ddddb2cafa2cf8c62d5e4f3257f0ade55 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 9 Mar 2020 13:34:22 +0800 Subject: [PATCH 213/215] Zero DateTime64 with zero scale is now same string representation as zero DateTime --- dbms/src/IO/WriteHelpers.h | 3 ++- dbms/src/IO/tests/gtest_DateTimeToString.cpp | 16 +++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index aaba21a008a..5c040c6ef8c 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -759,7 +759,8 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & // Exactly MaxScale zeroes '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' }; - buf.write(s, sizeof(s) - (MaxScale - scale)); + buf.write(s, sizeof(s) - (MaxScale - scale) + + (scale == 0 ? -1 : 0)); // if scale is zero, also remove the fractional_time_delimiter. return; } auto c = DecimalUtils::split(datetime64, scale); diff --git a/dbms/src/IO/tests/gtest_DateTimeToString.cpp b/dbms/src/IO/tests/gtest_DateTimeToString.cpp index 1d68d2a82bd..74af622ce00 100644 --- a/dbms/src/IO/tests/gtest_DateTimeToString.cpp +++ b/dbms/src/IO/tests/gtest_DateTimeToString.cpp @@ -138,6 +138,8 @@ TEST_P(DateTimeToStringParamTestDateTime64, writeDateText) ASSERT_NO_FATAL_FAILURE(Test(GetParam())); } +static const Int32 NON_ZERO_TIME_T = 10 * 365 * 3600 * 24 + 123456; + INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDayNum, ::testing::ValuesIn(std::initializer_list> { @@ -174,8 +176,8 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestTimeT, }, { "Non-Zero time_t is a valid date/time", - time_t(100LL * 365 * 3600 * 24 + 123456), - "2069-12-08 10:17:36" + time_t{NON_ZERO_TIME_T}, + "1979-12-31 10:17:36" }, // { // Negative time_t value produces (expectedly) bogus results, // // and there is no reliable way to verify output values on all platforms and configurations @@ -194,7 +196,7 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64, { "Zero DateTime64 with scale 0 is represented as valid date/time", DateTime64WithScale{0, 0}, - "0000-00-00 00:00:00." + "0000-00-00 00:00:00" }, { "Zero DateTime64 with scale 3 is repsented as valid data/time", @@ -203,13 +205,13 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64, }, { "Non-Zero DateTime64 with scale 0", - DateTime64WithScale{10 * 365 * 3600 * 24, 0}, - "1979-12-30 00:00:00" + DateTime64WithScale{NON_ZERO_TIME_T, 0}, + "1979-12-31 10:17:36" }, { "Non-Zero DateTime64 with scale 3", - DateTime64WithScale{10 * 365 * 3600 * 24 * 1000ULL + 123, 3}, - "1979-12-30 00:00:00.123" + DateTime64WithScale{NON_ZERO_TIME_T * 1000LL + 123, 3}, + "1979-12-31 10:17:36.123" }, // { // "Negative time_t value wraps around as if it was UInt32 due to LUT limitations and to maintain compatibility with existing code", From 492916d76a7f5d619736089f26e3f20b25ea286a Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 9 Mar 2020 21:36:49 +0800 Subject: [PATCH 214/215] Fixed build on GCC-9 --- dbms/src/IO/tests/gtest_DateTimeToString.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/IO/tests/gtest_DateTimeToString.cpp b/dbms/src/IO/tests/gtest_DateTimeToString.cpp index 74af622ce00..3ffc5a9a354 100644 --- a/dbms/src/IO/tests/gtest_DateTimeToString.cpp +++ b/dbms/src/IO/tests/gtest_DateTimeToString.cpp @@ -88,7 +88,7 @@ class DateTimeToStringParamTestBase : public ::testing::TestWithParam & param) { - [[maybe_unused]] const auto & [description, input, expected, timezone] = param; + [[maybe_unused]] const auto & [description, input, expected, timezone_name] = param; using namespace DB; WriteBufferFromOwnString out; @@ -99,11 +99,11 @@ public: } else if constexpr (std::is_same_v) { - writeDateTimeText(input, out, DateLUT::instance(timezone)); + writeDateTimeText(input, out, DateLUT::instance(timezone_name)); } else if constexpr (std::is_same_v) { - writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone)); + writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name)); } else { @@ -194,12 +194,12 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64, { /// Inside basic LUT boundaries { - "Zero DateTime64 with scale 0 is represented as valid date/time", + "Zero DateTime64 with scale 0 string representation matches one of zero time_t", DateTime64WithScale{0, 0}, "0000-00-00 00:00:00" }, { - "Zero DateTime64 with scale 3 is repsented as valid data/time", + "Zero DateTime64 with scale 3 string representation matches one of zero time_t with subsecond part", DateTime64WithScale{0, 3}, "0000-00-00 00:00:00.000" }, From f70d00d28d85670f24ffe811e6bb2643500a9f44 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 9 Mar 2020 20:26:18 +0300 Subject: [PATCH 215/215] Removed unused requirement #9569 --- docs/tools/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 4d263a1a10e..c26b72c4409 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -18,7 +18,6 @@ livereload==2.6.1 Markdown==2.6.11 MarkupSafe==1.1.1 mkdocs==1.0.4 -Pygments==2.5.2 python-slugify==1.2.6 pytz==2019.3 PyYAML==5.3