From d5f8e0347ffd37b048d964a3982e12f9356b4fce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:11:11 +0300 Subject: [PATCH 01/23] Better setup of default time zone from configuration file --- libs/libdaemon/src/BaseDaemon.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 5c51b357f8a..807506775ae 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -597,10 +597,12 @@ void BaseDaemon::initialize(Application & self) /// This must be done before any usage of DateLUT. In particular, before any logging. if (config().has("timezone")) { - if (0 != setenv("TZ", config().getString("timezone").data(), 1)) + const std::string timezone = config().getString("timezone"); + if (0 != setenv("TZ", timezone.data(), 1)) throw Poco::Exception("Cannot setenv TZ variable"); tzset(); + DateLUT::setDefaultTimezone(timezone); } std::string log_path = config().getString("logger.log", ""); From 30cc5698453653aaec363103c6ab971fa45a3d81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:11:40 +0300 Subject: [PATCH 02/23] Avoid initializing DateLUT in static constructors --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 58 +++++++++---------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 8fb6ab5a359..0aed11a48c9 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -13,6 +13,8 @@ #include #include #include +#include + namespace DB { @@ -27,34 +29,28 @@ namespace DB extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; extern const int THERE_IS_NO_COLUMN; } - const std::unordered_map> arrow_type_to_internal_type = { - //{arrow::Type::DECIMAL, std::make_shared()}, - {arrow::Type::UINT8, std::make_shared()}, - {arrow::Type::INT8, std::make_shared()}, - {arrow::Type::UINT16, std::make_shared()}, - {arrow::Type::INT16, std::make_shared()}, - {arrow::Type::UINT32, std::make_shared()}, - {arrow::Type::INT32, std::make_shared()}, - {arrow::Type::UINT64, std::make_shared()}, - {arrow::Type::INT64, std::make_shared()}, - {arrow::Type::HALF_FLOAT, std::make_shared()}, - {arrow::Type::FLOAT, std::make_shared()}, - {arrow::Type::DOUBLE, std::make_shared()}, - {arrow::Type::BOOL, std::make_shared()}, - //{arrow::Type::DATE32, std::make_shared()}, - {arrow::Type::DATE32, std::make_shared()}, - //{arrow::Type::DATE32, std::make_shared()}, - {arrow::Type::DATE64, std::make_shared()}, - {arrow::Type::TIMESTAMP, std::make_shared()}, - //{arrow::Type::TIME32, std::make_shared()}, + static const std::initializer_list> arrow_type_to_internal_type = + { + {arrow::Type::UINT8, "UInt8"}, + {arrow::Type::INT8, "Int8"}, + {arrow::Type::UINT16, "UInt16"}, + {arrow::Type::INT16, "Int16"}, + {arrow::Type::UINT32, "UInt32"}, + {arrow::Type::INT32, "Int32"}, + {arrow::Type::UINT64, "UInt64"}, + {arrow::Type::INT64, "Int64"}, + {arrow::Type::HALF_FLOAT, "Float32"}, + {arrow::Type::FLOAT, "Float32"}, + {arrow::Type::DOUBLE, "Float64"}, + {arrow::Type::BOOL, "UInt8"}, + {arrow::Type::DATE32, "Date"}, + {arrow::Type::DATE64, "DateTime"}, + {arrow::Type::TIMESTAMP, "DateTime"}, - {arrow::Type::STRING, std::make_shared()}, - {arrow::Type::BINARY, std::make_shared()}, - //{arrow::Type::FIXED_SIZE_BINARY, std::make_shared()}, - //{arrow::Type::UUID, std::make_shared()}, - + {arrow::Type::STRING, "String"}, + {arrow::Type::BINARY, "String"}, // TODO: add other types that are convertable to internal ones: // 0. ENUM? @@ -253,7 +249,7 @@ namespace DB void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk &res, std::shared_ptr &table, arrow::Status &read_status, const Block &header, int &row_group_current, const Context &context, std::string format_name) - { + { Columns columns_list; UInt64 num_rows = 0; @@ -308,14 +304,16 @@ namespace DB const auto decimal_type = static_cast(arrow_column->type().get()); internal_nested_type = std::make_shared>(decimal_type->precision(), decimal_type->scale()); - } else if (arrow_type_to_internal_type.find(arrow_type) != arrow_type_to_internal_type.end()) + } + else if (auto internal_type_it = std::find_if(arrow_type_to_internal_type.begin(), arrow_type_to_internal_type.end(), + [=](auto && elem) { return elem.first == arrow_type; }); + internal_type_it != arrow_type_to_internal_type.end()) { - internal_nested_type = arrow_type_to_internal_type.at(arrow_type); + internal_nested_type = DataTypeFactory::instance().get(internal_type_it->second); } else { - throw Exception - { + throw Exception{ "The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + "\" is not supported for conversion from a " + format_name + " data format", ErrorCodes::CANNOT_CONVERT_TYPE}; From cf57a884952b4103ff5f05a5671883bdf53c849c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:13:29 +0300 Subject: [PATCH 03/23] Added a way to forbid static initialization of a class --- dbms/programs/main.cpp | 12 ++++++++++++ libs/libcommon/src/DateLUTImpl.cpp | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 3fbbcee0f15..760eae4298b 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -21,6 +21,7 @@ #include #include +#include /// Universal executable for various clickhouse applications @@ -130,8 +131,19 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) } +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + + int main(int argc_, char ** argv_) { + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); diff --git a/libs/libcommon/src/DateLUTImpl.cpp b/libs/libcommon/src/DateLUTImpl.cpp index 3f812accb48..d6179373b4b 100644 --- a/libs/libcommon/src/DateLUTImpl.cpp +++ b/libs/libcommon/src/DateLUTImpl.cpp @@ -44,9 +44,16 @@ UInt8 getDayOfWeek(const cctz::civil_day & date) } +__attribute__((__weak__)) extern bool inside_main; + DateLUTImpl::DateLUTImpl(const std::string & time_zone_) : time_zone(time_zone_) { + /// DateLUT should not be initialized in global constructors for the following reasons: + /// 1. It is too heavy. + if (&inside_main) + assert(inside_main); + size_t i = 0; time_t start_of_day = DATE_LUT_MIN; From 509717dea79ca0327303fad4524decff595e8a92 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:18:19 +0300 Subject: [PATCH 04/23] Added integration test --- .../test_timezone_config/__init__.py | 0 .../test_timezone_config/configs/config.xml | 4 ++++ .../integration/test_timezone_config/test.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+) create mode 100644 dbms/tests/integration/test_timezone_config/__init__.py create mode 100644 dbms/tests/integration/test_timezone_config/configs/config.xml create mode 100644 dbms/tests/integration/test_timezone_config/test.py diff --git a/dbms/tests/integration/test_timezone_config/__init__.py b/dbms/tests/integration/test_timezone_config/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_timezone_config/configs/config.xml b/dbms/tests/integration/test_timezone_config/configs/config.xml new file mode 100644 index 00000000000..46c16a7688d --- /dev/null +++ b/dbms/tests/integration/test_timezone_config/configs/config.xml @@ -0,0 +1,4 @@ + + + America/Los_Angeles + diff --git a/dbms/tests/integration/test_timezone_config/test.py b/dbms/tests/integration/test_timezone_config/test.py new file mode 100644 index 00000000000..69f388bd459 --- /dev/null +++ b/dbms/tests/integration/test_timezone_config/test.py @@ -0,0 +1,17 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=['configs/config.xml']) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_check_client_logs_level(start_cluster): + assert TSV(instance.query("SELECT toDateTime(1111111111)")) == TSV("2005-03-17 17:58:31\n") From eb15c9416aa2659328f2986d50c212bbe1c4372c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 21:00:40 +0300 Subject: [PATCH 05/23] Fixed style --- dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 0aed11a48c9..0cd5ffb03e0 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -313,8 +313,7 @@ namespace DB } else { - throw Exception{ - "The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + throw Exception{"The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + "\" is not supported for conversion from a " + format_name + " data format", ErrorCodes::CANNOT_CONVERT_TYPE}; } From 216b05b71b67b14171db477782b570eee16580ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 21:33:24 +0300 Subject: [PATCH 06/23] Fixed build --- libs/libcommon/src/DateLUTImpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/libcommon/src/DateLUTImpl.cpp b/libs/libcommon/src/DateLUTImpl.cpp index d6179373b4b..51f5ceb759c 100644 --- a/libs/libcommon/src/DateLUTImpl.cpp +++ b/libs/libcommon/src/DateLUTImpl.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #define DATE_LUT_MIN 0 From 3fdcc4ab308632e0e0eaefd38727e30680c0d2a4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:14:20 +0300 Subject: [PATCH 07/23] Fixed typo in README.md --- dbms/tests/integration/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 06819af7668..f608a10deaf 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -34,7 +34,7 @@ set the following environment variables: ### Running with runner script -The only requirement is fresh docker configured docker. +The only requirement is fresh configured docker. Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. From 48bf4abdf0b28c57a10ec44ce1492f7828d9612c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:14:47 +0300 Subject: [PATCH 08/23] Fixed function name --- dbms/tests/integration/test_timezone_config/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_timezone_config/test.py b/dbms/tests/integration/test_timezone_config/test.py index 69f388bd459..572c1f17255 100644 --- a/dbms/tests/integration/test_timezone_config/test.py +++ b/dbms/tests/integration/test_timezone_config/test.py @@ -13,5 +13,5 @@ def start_cluster(): finally: cluster.shutdown() -def test_check_client_logs_level(start_cluster): +def test_check_timezone_config(start_cluster): assert TSV(instance.query("SELECT toDateTime(1111111111)")) == TSV("2005-03-17 17:58:31\n") From da5e7f31c4eb82a0f55624ae7d62652fda6e62f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:25:34 +0300 Subject: [PATCH 09/23] Updated README --- dbms/tests/integration/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index f608a10deaf..a0a5322e5a6 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -34,7 +34,8 @@ set the following environment variables: ### Running with runner script -The only requirement is fresh configured docker. +The only requirement is fresh configured docker and +docker pull yandex/clickhouse-integration-tests-runner Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. From 4cb640717b935dee4e1d5e877f9ad0c7f25b49b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:28:27 +0300 Subject: [PATCH 10/23] Fixed test --- dbms/tests/integration/test_timezone_config/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_timezone_config/test.py b/dbms/tests/integration/test_timezone_config/test.py index 572c1f17255..22e11daa72e 100644 --- a/dbms/tests/integration/test_timezone_config/test.py +++ b/dbms/tests/integration/test_timezone_config/test.py @@ -14,4 +14,4 @@ def start_cluster(): cluster.shutdown() def test_check_timezone_config(start_cluster): - assert TSV(instance.query("SELECT toDateTime(1111111111)")) == TSV("2005-03-17 17:58:31\n") + assert node.query("SELECT toDateTime(1111111111)") == "2005-03-17 17:58:31\n" From d64d75eb6d8cff2096996ec686bdf6d691621fb5 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 2 Sep 2019 17:50:57 +0300 Subject: [PATCH 11/23] Split the integration test `test_dictionaries` into 4 separate tests. --- .../configs/dictionaries/.gitignore | 4 - .../integration/test_dictionaries/test.py | 411 ------------------ .../__init__.py | 0 .../configs/config.xml | 0 .../configs/dictionaries/.gitkeep | 0 .../configs/users.xml | 0 .../dictionary.py | 0 .../external_sources.py | 0 .../fake_cert.pem | 0 .../http_server.py | 0 .../test.py | 0 .../__init__.py | 0 .../configs/config.xml | 0 .../dictionaries/complex_key_cache_string.xml | 0 .../configs/users.xml | 0 .../test.py | 0 .../__init__.py | 0 .../configs/config.xml | 30 ++ .../configs/dictionaries/dep_x.xml} | 0 .../configs/dictionaries/dep_y.xml} | 2 +- .../configs/dictionaries/dep_z.xml} | 0 .../configs/users.xml | 0 .../test.py | 76 ++++ .../test_dictionaries_null_value/__init__.py | 0 .../configs/config.xml | 0 .../configs/dictionaries/cache.xml | 113 +++++ .../configs/users.xml | 23 + .../test_dictionaries_null_value/test.py | 45 ++ .../test_dictionaries_select_all/__init__.py | 0 .../configs/config.xml | 30 ++ .../configs/dictionaries/.gitignore | 3 + .../configs/dictionaries/source.tsv | 0 .../configs/users.xml | 23 + .../generate_dictionaries.py | 10 - .../test_dictionaries_select_all/test.py | 122 ++++++ .../__init__.py | 0 .../configs/config.xml | 30 ++ .../configs/dictionaries/cache_xypairs.xml} | 0 .../configs/dictionaries/executable.xml} | 2 +- .../configs/dictionaries/file.txt} | 0 .../configs/dictionaries/file.xml} | 6 +- .../configs/dictionaries/slow.xml} | 2 +- .../configs/users.xml | 23 + .../test.py | 246 +++++++++++ 44 files changed, 770 insertions(+), 431 deletions(-) delete mode 100644 dbms/tests/integration/test_dictionaries/configs/dictionaries/.gitignore delete mode 100644 dbms/tests/integration/test_dictionaries/test.py rename dbms/tests/integration/{test_cached_dictionary_string => test_dictionaries_all_layouts_and_sources}/__init__.py (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/configs/config.xml (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/configs/dictionaries/.gitkeep (100%) rename dbms/tests/integration/{test_cached_dictionary_string => test_dictionaries_all_layouts_and_sources}/configs/users.xml (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/dictionary.py (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/external_sources.py (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/fake_cert.pem (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/http_server.py (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_all_layouts_and_sources}/test.py (100%) rename dbms/tests/integration/{test_dictionaries => test_dictionaries_complex_key_cache_string}/__init__.py (100%) rename dbms/tests/integration/{test_cached_dictionary_string => test_dictionaries_complex_key_cache_string}/configs/config.xml (100%) rename dbms/tests/integration/{test_cached_dictionary_string => test_dictionaries_complex_key_cache_string}/configs/dictionaries/complex_key_cache_string.xml (100%) rename dbms/tests/integration/{test_dictionaries => test_dictionaries_complex_key_cache_string}/configs/users.xml (100%) rename dbms/tests/integration/{test_cached_dictionary_string => test_dictionaries_complex_key_cache_string}/test.py (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_depend_on_dictionaries}/__init__.py (100%) create mode 100644 dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_dep_x.xml => test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml} (100%) rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_dep_y.xml => test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml} (95%) rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_dep_z.xml => test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml} (100%) rename dbms/tests/integration/{test_external_dictionaries => test_dictionaries_depend_on_dictionaries}/configs/users.xml (100%) create mode 100644 dbms/tests/integration/test_dictionaries_depend_on_dictionaries/test.py create mode 100644 dbms/tests/integration/test_dictionaries_null_value/__init__.py rename dbms/tests/integration/{test_dictionaries => test_dictionaries_null_value}/configs/config.xml (100%) create mode 100644 dbms/tests/integration/test_dictionaries_null_value/configs/dictionaries/cache.xml create mode 100644 dbms/tests/integration/test_dictionaries_null_value/configs/users.xml create mode 100644 dbms/tests/integration/test_dictionaries_null_value/test.py create mode 100644 dbms/tests/integration/test_dictionaries_select_all/__init__.py create mode 100644 dbms/tests/integration/test_dictionaries_select_all/configs/config.xml create mode 100644 dbms/tests/integration/test_dictionaries_select_all/configs/dictionaries/.gitignore rename dbms/tests/integration/{test_dictionaries => test_dictionaries_select_all}/configs/dictionaries/source.tsv (100%) create mode 100644 dbms/tests/integration/test_dictionaries_select_all/configs/users.xml rename dbms/tests/integration/{test_dictionaries => test_dictionaries_select_all}/generate_dictionaries.py (96%) create mode 100644 dbms/tests/integration/test_dictionaries_select_all/test.py create mode 100644 dbms/tests/integration/test_dictionaries_update_and_reload/__init__.py create mode 100644 dbms/tests/integration/test_dictionaries_update_and_reload/configs/config.xml rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_cache_xypairs.xml => test_dictionaries_update_and_reload/configs/dictionaries/cache_xypairs.xml} (100%) rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_cmd.xml => test_dictionaries_update_and_reload/configs/dictionaries/executable.xml} (93%) rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_file.txt => test_dictionaries_update_and_reload/configs/dictionaries/file.txt} (100%) rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_file.xml => test_dictionaries_update_and_reload/configs/dictionaries/file.xml} (82%) rename dbms/tests/integration/{test_dictionaries/configs/dictionaries/dictionary_preset_longload.xml => test_dictionaries_update_and_reload/configs/dictionaries/slow.xml} (94%) create mode 100644 dbms/tests/integration/test_dictionaries_update_and_reload/configs/users.xml create mode 100644 dbms/tests/integration/test_dictionaries_update_and_reload/test.py diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/.gitignore b/dbms/tests/integration/test_dictionaries/configs/dictionaries/.gitignore deleted file mode 100644 index 8f1b0e23a85..00000000000 --- a/dbms/tests/integration/test_dictionaries/configs/dictionaries/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -* -!.gitignore -!source.tsv -!dictionary_preset* \ No newline at end of file diff --git a/dbms/tests/integration/test_dictionaries/test.py b/dbms/tests/integration/test_dictionaries/test.py deleted file mode 100644 index 95f82f65c0d..00000000000 --- a/dbms/tests/integration/test_dictionaries/test.py +++ /dev/null @@ -1,411 +0,0 @@ -import pytest -import os -import time - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV, assert_eq_with_retry -from generate_dictionaries import generate_structure, generate_dictionaries, DictionaryTestTable - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -cluster = None -instance = None -test_table = None - - -def get_status(dictionary_name): - return instance.query("SELECT status FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n") - - -def get_last_exception(dictionary_name): - return instance.query("SELECT last_exception FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n").replace("\\'", "'") - - -def get_loading_start_time(dictionary_name): - s = instance.query("SELECT loading_start_time FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n") - if s == "0000-00-00 00:00:00": - return None - return time.strptime(s, "%Y-%m-%d %H:%M:%S") - - -def get_loading_duration(dictionary_name): - return float(instance.query("SELECT loading_duration FROM system.dictionaries WHERE name='" + dictionary_name + "'")) - - -def replace_in_file_in_container(file_name, what, replace_with): - instance.exec_in_container('sed -i "s/' + what + '/' + replace_with + '/g" ' + file_name) - - -def setup_module(module): - global cluster - global instance - global test_table - - structure = generate_structure() - dictionary_files = generate_dictionaries(os.path.join(SCRIPT_DIR, 'configs/dictionaries'), structure) - - cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - instance = cluster.add_instance('instance', main_configs=dictionary_files) - test_table = DictionaryTestTable(os.path.join(SCRIPT_DIR, 'configs/dictionaries/source.tsv')) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - instance.query("CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary") - test_table.create_clickhouse_source(instance) - for line in TSV(instance.query('select name from system.dictionaries')).lines: - print line, - - # Create table `test.small_dict_source` - instance.query(''' - drop table if exists test.small_dict_source; - create table test.small_dict_source (id UInt64, a String, b Int32, c Float64) engine=Log; - insert into test.small_dict_source values (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7); - ''') - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.fixture(params=[ - # name, keys, use_parent - ('clickhouse_hashed', ('id',), True), - ('clickhouse_flat', ('id',), True), - ('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False), - ('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False), - ('clickhouse_range_hashed', ('id', 'StartDate', 'EndDate'), False), -], - ids=['clickhouse_hashed', 'clickhouse_flat', - 'clickhouse_complex_integers_key_hashed', - 'clickhouse_complex_mixed_key_hashed', - 'clickhouse_range_hashed'] -) -def dictionary_structure(started_cluster, request): - return request.param - - -def test_select_all(dictionary_structure): - name, keys, use_parent = dictionary_structure - query = instance.query - - structure = test_table.get_structure_for_keys(keys, use_parent) - query(''' - DROP TABLE IF EXISTS test.{0} - '''.format(name)) - - create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure) - TSV(query(create_query)) - - result = TSV(query('select * from test.{0}'.format(name))) - - diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True) - print test_table.process_diff(diff) - assert not diff - - -@pytest.fixture(params=[ - # name, keys, use_parent - ('clickhouse_cache', ('id',), True), - ('clickhouse_complex_integers_key_cache', ('key0', 'key1'), False), - ('clickhouse_complex_mixed_key_cache', ('key0_str', 'key1'), False) -], - ids=['clickhouse_cache', 'clickhouse_complex_integers_key_cache', 'clickhouse_complex_mixed_key_cache'] -) -def cached_dictionary_structure(started_cluster, request): - return request.param - - -def test_select_all_from_cached(cached_dictionary_structure): - name, keys, use_parent = cached_dictionary_structure - query = instance.query - - structure = test_table.get_structure_for_keys(keys, use_parent) - query(''' - DROP TABLE IF EXISTS test.{0} - '''.format(name)) - - create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure) - TSV(query(create_query)) - - for i in range(4): - result = TSV(query('select * from test.{0}'.format(name))) - diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=False) - print test_table.process_diff(diff) - assert not diff - - key = [] - for key_name in keys: - if key_name.endswith('str'): - key.append("'" + str(i) + "'") - else: - key.append(str(i)) - if len(key) == 1: - key = 'toUInt64(' + str(i) + ')' - else: - key = str('(' + ','.join(key) + ')') - query("select dictGetUInt8('{0}', 'UInt8_', {1})".format(name, key)) - - result = TSV(query('select * from test.{0}'.format(name))) - diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True) - print test_table.process_diff(diff) - assert not diff - - -def test_null_value(started_cluster): - query = instance.query - - assert TSV(query("select dictGetUInt8('clickhouse_cache', 'UInt8_', toUInt64(12121212))")) == TSV("1") - assert TSV(query("select dictGetString('clickhouse_cache', 'String_', toUInt64(12121212))")) == TSV("implicit-default") - assert TSV(query("select dictGetDate('clickhouse_cache', 'Date_', toUInt64(12121212))")) == TSV("2015-11-25") - - # Check, that empty null_value interprets as default value - assert TSV(query("select dictGetUInt64('clickhouse_cache', 'UInt64_', toUInt64(12121212))")) == TSV("0") - assert TSV(query("select dictGetDateTime('clickhouse_cache', 'DateTime_', toUInt64(12121212))")) == TSV("0000-00-00 00:00:00") - - -def test_dictionary_dependency(started_cluster): - query = instance.query - - # dictionaries_lazy_load == false, so these dictionary are not loaded. - assert get_status('dep_x') == 'NOT_LOADED' - assert get_status('dep_y') == 'NOT_LOADED' - assert get_status('dep_z') == 'NOT_LOADED' - - # Dictionary 'dep_x' depends on 'dep_z', which depends on 'dep_y'. - # So they all should be loaded at once. - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(1))") == "air\n" - assert get_status('dep_x') == 'LOADED' - assert get_status('dep_y') == 'LOADED' - assert get_status('dep_z') == 'LOADED' - - # Other dictionaries should work too. - assert query("SELECT dictGetString('dep_y', 'a', toUInt64(1))") == "air\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(1))") == "air\n" - - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n" - assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "YY\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n" - - # Update the source table. - query("insert into test.small_dict_source values (3, 'fire', 30, 8)") - - # Wait for dictionaries to be reloaded. - assert_eq_with_retry(instance, "SELECT dictHas('dep_y', toUInt64(3))", "1", sleep_time = 2, retry_count = 10) - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n" - assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n" - - # dep_x and dep_z are updated only when there `intDiv(count(), 4)` is changed. - query("insert into test.small_dict_source values (4, 'ether', 404, 0.001)") - assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time = 2, retry_count = 10) - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n" - assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "fire\n" - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "ether\n" - assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ether\n" - - -def test_reload_while_loading(started_cluster): - query = instance.query - - # dictionaries_lazy_load == false, so this dictionary is not loaded. - assert get_status('longload') == "NOT_LOADED" - assert get_loading_duration('longload') == 0 - - # It's not possible to get a value from the dictionary within 1.0 second, so the following query fails by timeout. - assert query("SELECT dictGetInt32('longload', 'a', toUInt64(5))", timeout = 1, ignore_error = True) == "" - - # The dictionary is now loading. - assert get_status('longload') == "LOADING" - start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload') - assert duration > 0 - - time.sleep(0.5) # Still loading. - assert get_status('longload') == "LOADING" - prev_start_time, prev_duration = start_time, duration - start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload') - assert start_time == prev_start_time - assert duration >= prev_duration - - # SYSTEM RELOAD DICTIONARY should restart loading. - query("SYSTEM RELOAD DICTIONARY 'longload'") - assert get_status('longload') == "LOADING" - prev_start_time, prev_duration = start_time, duration - start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload') - assert start_time > prev_start_time - assert duration < prev_duration - - time.sleep(0.5) # Still loading. - assert get_status('longload') == "LOADING" - prev_start_time, prev_duration = start_time, duration - start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload') - assert start_time == prev_start_time - assert duration >= prev_duration - - # SYSTEM RELOAD DICTIONARIES should restart loading again. - query("SYSTEM RELOAD DICTIONARIES") - assert get_status('longload') == "LOADING" - prev_start_time, prev_duration = start_time, duration - start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload') - assert start_time > prev_start_time - assert duration < prev_duration - - # Changing the configuration file should restart loading one more time. - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_longload.xml', 'sleep 100', 'sleep 0') - time.sleep(5) # Configuration files are reloaded once in 5 seconds. - - # This time loading should finish quickly. - assert get_status('longload') == "LOADED" - assert query("SELECT dictGetInt32('longload', 'a', toUInt64(5))") == "6\n" - - -def test_reload_after_loading(started_cluster): - query = instance.query - - assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "8\n" - assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "10\n" - - # Change the dictionaries' data. - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_cmd.xml', '8', '81') - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_file.txt', '10', '101') - - # SYSTEM RELOAD 'name' reloads only the specified dictionary. - query("SYSTEM RELOAD DICTIONARY 'cmd'") - assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "81\n" - assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "10\n" - - query("SYSTEM RELOAD DICTIONARY 'file'") - assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "81\n" - assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "101\n" - - # SYSTEM RELOAD DICTIONARIES reloads all loaded dictionaries. - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_cmd.xml', '81', '82') - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_file.txt', '101', '102') - query("SYSTEM RELOAD DICTIONARIES") - assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "82\n" - assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "102\n" - - # Configuration files are reloaded and lifetimes are checked automatically once in 5 seconds. - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_cmd.xml', '82', '83') - replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_file.txt', '102', '103') - time.sleep(5) - assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "103\n" - assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "83\n" - - -def test_reload_after_fail_by_system_reload(started_cluster): - query = instance.query - - # dictionaries_lazy_load == false, so this dictionary is not loaded. - assert get_status("no_file") == "NOT_LOADED" - - # We expect an error because the file source doesn't exist. - expected_error = "No such file" - assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") - assert get_status("no_file") == "FAILED" - - # SYSTEM RELOAD should not change anything now, the status is still FAILED. - query("SYSTEM RELOAD DICTIONARY 'no_file'") - assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") - assert get_status("no_file") == "FAILED" - - # Creating the file source makes the dictionary able to load. - instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/dictionary_preset_file.txt"), "/etc/clickhouse-server/config.d/dictionary_preset_no_file.txt") - query("SYSTEM RELOAD DICTIONARY 'no_file'") - query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n" - assert get_status("no_file") == "LOADED" - - # Removing the file source should not spoil the loaded dictionary. - instance.exec_in_container("rm /etc/clickhouse-server/config.d/dictionary_preset_no_file.txt") - query("SYSTEM RELOAD DICTIONARY 'no_file'") - query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n" - assert get_status("no_file") == "LOADED" - - -def test_reload_after_fail_by_timer(started_cluster): - query = instance.query - - # dictionaries_lazy_load == false, so this dictionary is not loaded. - assert get_status("no_file_2") == "NOT_LOADED" - - # We expect an error because the file source doesn't exist. - expected_error = "No such file" - assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") - assert get_status("no_file_2") == "FAILED" - - # Passed time should not change anything now, the status is still FAILED. - time.sleep(6); - assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") - assert get_status("no_file_2") == "FAILED" - - # Creating the file source makes the dictionary able to load. - instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/dictionary_preset_file.txt"), "/etc/clickhouse-server/config.d/dictionary_preset_no_file_2.txt") - time.sleep(6); - query("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") == "10\n" - assert get_status("no_file_2") == "LOADED" - - # Removing the file source should not spoil the loaded dictionary. - instance.exec_in_container("rm /etc/clickhouse-server/config.d/dictionary_preset_no_file_2.txt") - time.sleep(6); - query("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") == "10\n" - assert get_status("no_file_2") == "LOADED" - - -def test_reload_after_fail_in_cache_dictionary(started_cluster): - query = instance.query - query_and_get_error = instance.query_and_get_error - - # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. - expected_error = "Table test.xypairs doesn't exist" - assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))") - assert get_status("cache_xypairs") == "LOADED" - assert expected_error in get_last_exception("cache_xypairs") - - # Create table `test.xypairs`. - query(''' - drop table if exists test.xypairs; - create table test.xypairs (x UInt64, y UInt64) engine=Log; - insert into test.xypairs values (1, 56), (3, 78); - ''') - - # Cache dictionary now works. - assert_eq_with_retry(instance, "SELECT dictGet('cache_xypairs', 'y', toUInt64(1))", "56", ignore_error=True) - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" - assert get_last_exception("cache_xypairs") == "" - - # Drop table `test.xypairs`. - query('drop table if exists test.xypairs') - - # Values are cached so we can get them. - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(1))") == "56" - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" - assert get_last_exception("cache_xypairs") == "" - - # But we can't get a value from the source table which isn't cached. - assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(3))") - assert expected_error in get_last_exception("cache_xypairs") - - # Passed time should not spoil the cache. - time.sleep(5); - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(1))") == "56" - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" - assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(3))") - assert expected_error in get_last_exception("cache_xypairs") - - # Create table `test.xypairs` again with changed values. - query(''' - drop table if exists test.xypairs; - create table test.xypairs (x UInt64, y UInt64) engine=Log; - insert into test.xypairs values (1, 57), (3, 79); - ''') - - # The cache dictionary returns new values now. - assert_eq_with_retry(instance, "SELECT dictGet('cache_xypairs', 'y', toUInt64(1))", "57") - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" - query("SELECT dictGet('cache_xypairs', 'y', toUInt64(3))") == "79" - assert get_last_exception("cache_xypairs") == "" diff --git a/dbms/tests/integration/test_cached_dictionary_string/__init__.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/__init__.py similarity index 100% rename from dbms/tests/integration/test_cached_dictionary_string/__init__.py rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/__init__.py diff --git a/dbms/tests/integration/test_external_dictionaries/configs/config.xml b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/configs/config.xml similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/configs/config.xml rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/configs/config.xml diff --git a/dbms/tests/integration/test_external_dictionaries/configs/dictionaries/.gitkeep b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/configs/dictionaries/.gitkeep similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/configs/dictionaries/.gitkeep rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/configs/dictionaries/.gitkeep diff --git a/dbms/tests/integration/test_cached_dictionary_string/configs/users.xml b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/configs/users.xml similarity index 100% rename from dbms/tests/integration/test_cached_dictionary_string/configs/users.xml rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/configs/users.xml diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/dictionary.py rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/external_sources.py rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py diff --git a/dbms/tests/integration/test_external_dictionaries/fake_cert.pem b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/fake_cert.pem similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/fake_cert.pem rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/fake_cert.pem diff --git a/dbms/tests/integration/test_external_dictionaries/http_server.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/http_server.py similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/http_server.py rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/http_server.py diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/test.py rename to dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py diff --git a/dbms/tests/integration/test_dictionaries/__init__.py b/dbms/tests/integration/test_dictionaries_complex_key_cache_string/__init__.py similarity index 100% rename from dbms/tests/integration/test_dictionaries/__init__.py rename to dbms/tests/integration/test_dictionaries_complex_key_cache_string/__init__.py diff --git a/dbms/tests/integration/test_cached_dictionary_string/configs/config.xml b/dbms/tests/integration/test_dictionaries_complex_key_cache_string/configs/config.xml similarity index 100% rename from dbms/tests/integration/test_cached_dictionary_string/configs/config.xml rename to dbms/tests/integration/test_dictionaries_complex_key_cache_string/configs/config.xml diff --git a/dbms/tests/integration/test_cached_dictionary_string/configs/dictionaries/complex_key_cache_string.xml b/dbms/tests/integration/test_dictionaries_complex_key_cache_string/configs/dictionaries/complex_key_cache_string.xml similarity index 100% rename from dbms/tests/integration/test_cached_dictionary_string/configs/dictionaries/complex_key_cache_string.xml rename to dbms/tests/integration/test_dictionaries_complex_key_cache_string/configs/dictionaries/complex_key_cache_string.xml diff --git a/dbms/tests/integration/test_dictionaries/configs/users.xml b/dbms/tests/integration/test_dictionaries_complex_key_cache_string/configs/users.xml similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/users.xml rename to dbms/tests/integration/test_dictionaries_complex_key_cache_string/configs/users.xml diff --git a/dbms/tests/integration/test_cached_dictionary_string/test.py b/dbms/tests/integration/test_dictionaries_complex_key_cache_string/test.py similarity index 100% rename from dbms/tests/integration/test_cached_dictionary_string/test.py rename to dbms/tests/integration/test_dictionaries_complex_key_cache_string/test.py diff --git a/dbms/tests/integration/test_external_dictionaries/__init__.py b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/__init__.py similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/__init__.py rename to dbms/tests/integration/test_dictionaries_depend_on_dictionaries/__init__.py diff --git a/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml new file mode 100644 index 00000000000..b60daf72dcf --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml @@ -0,0 +1,30 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + + /etc/clickhouse-server/config.d/*.xml + diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_x.xml b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_x.xml rename to dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_y.xml b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml similarity index 95% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_y.xml rename to dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml index 8806c724111..227d87ca92a 100644 --- a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_y.xml +++ b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml @@ -8,7 +8,7 @@ default test - small_dict_source
+ elements
5 diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_z.xml b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_dep_z.xml rename to dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml diff --git a/dbms/tests/integration/test_external_dictionaries/configs/users.xml b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/users.xml similarity index 100% rename from dbms/tests/integration/test_external_dictionaries/configs/users.xml rename to dbms/tests/integration/test_dictionaries_depend_on_dictionaries/configs/users.xml diff --git a/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/test.py b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/test.py new file mode 100644 index 00000000000..c0ce0af0313 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_depend_on_dictionaries/test.py @@ -0,0 +1,76 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DICTIONARY_FILES = ['configs/dictionaries/dep_x.xml', 'configs/dictionaries/dep_y.xml', 'configs/dictionaries/dep_z.xml'] + +cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) +instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + instance.query(''' + CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary; + CREATE DATABASE IF NOT EXISTS test; + DROP TABLE IF EXISTS test.elements; + CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log; + INSERT INTO test.elements VALUES (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7); + ''') + + yield cluster + + finally: + cluster.shutdown() + + +def get_status(dictionary_name): + return instance.query("SELECT status FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n") + + +def test_get_data(started_cluster): + query = instance.query + + # dictionaries_lazy_load == false, so these dictionary are not loaded. + assert get_status('dep_x') == 'NOT_LOADED' + assert get_status('dep_y') == 'NOT_LOADED' + assert get_status('dep_z') == 'NOT_LOADED' + + # Dictionary 'dep_x' depends on 'dep_z', which depends on 'dep_y'. + # So they all should be loaded at once. + assert query("SELECT dictGetString('dep_x', 'a', toUInt64(1))") == "air\n" + assert get_status('dep_x') == 'LOADED' + assert get_status('dep_y') == 'LOADED' + assert get_status('dep_z') == 'LOADED' + + # Other dictionaries should work too. + assert query("SELECT dictGetString('dep_y', 'a', toUInt64(1))") == "air\n" + assert query("SELECT dictGetString('dep_z', 'a', toUInt64(1))") == "air\n" + + assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n" + assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "YY\n" + assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n" + + # Update the source table. + query("INSERT INTO test.elements VALUES (3, 'fire', 30, 8)") + + # Wait for dictionaries to be reloaded. + assert_eq_with_retry(instance, "SELECT dictHas('dep_y', toUInt64(3))", "1", sleep_time = 2, retry_count = 10) + assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n" + assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" + assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n" + + # dep_x and dep_z are updated only when there `intDiv(count(), 4)` is changed. + query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)") + assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time = 2, retry_count = 10) + assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n" + assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" + assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "fire\n" + assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "ether\n" + assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n" + assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ether\n" diff --git a/dbms/tests/integration/test_dictionaries_null_value/__init__.py b/dbms/tests/integration/test_dictionaries_null_value/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_dictionaries/configs/config.xml b/dbms/tests/integration/test_dictionaries_null_value/configs/config.xml similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/config.xml rename to dbms/tests/integration/test_dictionaries_null_value/configs/config.xml diff --git a/dbms/tests/integration/test_dictionaries_null_value/configs/dictionaries/cache.xml b/dbms/tests/integration/test_dictionaries_null_value/configs/dictionaries/cache.xml new file mode 100644 index 00000000000..9a1ae0732db --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_null_value/configs/dictionaries/cache.xml @@ -0,0 +1,113 @@ + + + cache + + + + localhost + 9000 + default + + test + source
+
+ + + 0 + + + 128 + + + + + id + + + + UInt8_ + UInt8 + 1 + + + + UInt16_ + UInt16 + 1 + + + + UInt32_ + UInt32 + 1 + + + + UInt64_ + UInt64 + + + + + Int8_ + Int8 + -1 + + + + Int16_ + Int16 + -1 + + + + Int32_ + Int32 + -1 + + + + Int64_ + Int64 + -1 + + + + Float32_ + Float32 + 2.71828 + + + + Float64_ + Float64 + 2.71828 + + + + String_ + String + implicit-default + + + + Date_ + Date + 2015-11-25 + + + + DateTime_ + DateTime + + + + + Parent + UInt64 + true + 0 + + +
+
diff --git a/dbms/tests/integration/test_dictionaries_null_value/configs/users.xml b/dbms/tests/integration/test_dictionaries_null_value/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_null_value/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/dbms/tests/integration/test_dictionaries_null_value/test.py b/dbms/tests/integration/test_dictionaries_null_value/test.py new file mode 100644 index 00000000000..e31f397c246 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_null_value/test.py @@ -0,0 +1,45 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DICTIONARY_FILES = ['configs/dictionaries/cache.xml'] + +cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) +instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + instance.query(''' + CREATE DATABASE IF NOT EXISTS test; + DROP TABLE IF EXISTS test.source; + CREATE TABLE test.source (id UInt64, key0 UInt8, key0_str String, key1 UInt8, + StartDate Date, EndDate Date, + UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64, + Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64, + Float32_ Float32, Float64_ Float64, + String_ String, + Date_ Date, DateTime_ DateTime, Parent UInt64) ENGINE=Log; + ''') + + yield cluster + + finally: + cluster.shutdown() + + +def test_null_value(started_cluster): + query = instance.query + + assert query("select dictGetUInt8('cache', 'UInt8_', toUInt64(12121212))") == "1\n" + assert query("select dictGetString('cache', 'String_', toUInt64(12121212))") == "implicit-default\n" + assert query("select dictGetDate('cache', 'Date_', toUInt64(12121212))") == "2015-11-25\n" + + # Check, that empty null_value interprets as default value + assert query("select dictGetUInt64('cache', 'UInt64_', toUInt64(12121212))") == "0\n" + assert query("select dictGetDateTime('cache', 'DateTime_', toUInt64(12121212))") == "0000-00-00 00:00:00\n" diff --git a/dbms/tests/integration/test_dictionaries_select_all/__init__.py b/dbms/tests/integration/test_dictionaries_select_all/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_dictionaries_select_all/configs/config.xml b/dbms/tests/integration/test_dictionaries_select_all/configs/config.xml new file mode 100644 index 00000000000..1e4c14585a9 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_select_all/configs/config.xml @@ -0,0 +1,30 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + + /etc/clickhouse-server/config.d/*.xml + diff --git a/dbms/tests/integration/test_dictionaries_select_all/configs/dictionaries/.gitignore b/dbms/tests/integration/test_dictionaries_select_all/configs/dictionaries/.gitignore new file mode 100644 index 00000000000..cc461064a39 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_select_all/configs/dictionaries/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!source.tsv \ No newline at end of file diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/source.tsv b/dbms/tests/integration/test_dictionaries_select_all/configs/dictionaries/source.tsv similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/source.tsv rename to dbms/tests/integration/test_dictionaries_select_all/configs/dictionaries/source.tsv diff --git a/dbms/tests/integration/test_dictionaries_select_all/configs/users.xml b/dbms/tests/integration/test_dictionaries_select_all/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_select_all/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/dbms/tests/integration/test_dictionaries/generate_dictionaries.py b/dbms/tests/integration/test_dictionaries_select_all/generate_dictionaries.py similarity index 96% rename from dbms/tests/integration/test_dictionaries/generate_dictionaries.py rename to dbms/tests/integration/test_dictionaries_select_all/generate_dictionaries.py index c644bd8f644..30a5648fdbe 100644 --- a/dbms/tests/integration/test_dictionaries/generate_dictionaries.py +++ b/dbms/tests/integration/test_dictionaries_select_all/generate_dictionaries.py @@ -12,13 +12,6 @@ types = [ 'Date', 'DateTime' ] -explicit_defaults = [ - '42', '42', '42', '42', - '-42', '-42', '-42', '-42', - '1.5', '1.6', - "'explicit-default'", - "'2015-01-01'", "'2015-01-01 00:00:00'" -] implicit_defaults = [ '1', '1', '1', '', @@ -182,9 +175,6 @@ def generate_dictionaries(path, structure): file_names = [] - # Add ready dictionaries. - file_names.extend(glob.glob(os.path.join(path, "*dictionary_preset*"))) - # Generate dictionaries. for (name, key_idx, has_parent), (source, layout) in zip(structure, sources_and_layouts): filename = os.path.join(path, 'dictionary_%s.xml' % name) diff --git a/dbms/tests/integration/test_dictionaries_select_all/test.py b/dbms/tests/integration/test_dictionaries_select_all/test.py new file mode 100644 index 00000000000..8bad8a9b214 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_select_all/test.py @@ -0,0 +1,122 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry +from generate_dictionaries import generate_structure, generate_dictionaries, DictionaryTestTable + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = None +instance = None +test_table = None + + +def setup_module(module): + global cluster + global instance + global test_table + + structure = generate_structure() + dictionary_files = generate_dictionaries(os.path.join(SCRIPT_DIR, 'configs/dictionaries'), structure) + + cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) + instance = cluster.add_instance('instance', main_configs=dictionary_files) + test_table = DictionaryTestTable(os.path.join(SCRIPT_DIR, 'configs/dictionaries/source.tsv')) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + test_table.create_clickhouse_source(instance) + for line in TSV(instance.query('select name from system.dictionaries')).lines: + print line, + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(params=[ + # name, keys, use_parent + ('clickhouse_hashed', ('id',), True), + ('clickhouse_flat', ('id',), True), + ('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False), + ('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False), + ('clickhouse_range_hashed', ('id', 'StartDate', 'EndDate'), False), +], + ids=['clickhouse_hashed', 'clickhouse_flat', + 'clickhouse_complex_integers_key_hashed', + 'clickhouse_complex_mixed_key_hashed', + 'clickhouse_range_hashed'] +) +def dictionary_structure(started_cluster, request): + return request.param + + +def test_select_all(dictionary_structure): + name, keys, use_parent = dictionary_structure + query = instance.query + + structure = test_table.get_structure_for_keys(keys, use_parent) + query(''' + DROP TABLE IF EXISTS test.{0} + '''.format(name)) + + create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure) + TSV(query(create_query)) + + result = TSV(query('select * from test.{0}'.format(name))) + + diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True) + print test_table.process_diff(diff) + assert not diff + + +@pytest.fixture(params=[ + # name, keys, use_parent + ('clickhouse_cache', ('id',), True), + ('clickhouse_complex_integers_key_cache', ('key0', 'key1'), False), + ('clickhouse_complex_mixed_key_cache', ('key0_str', 'key1'), False) +], + ids=['clickhouse_cache', 'clickhouse_complex_integers_key_cache', 'clickhouse_complex_mixed_key_cache'] +) +def cached_dictionary_structure(started_cluster, request): + return request.param + + +def test_select_all_from_cached(cached_dictionary_structure): + name, keys, use_parent = cached_dictionary_structure + query = instance.query + + structure = test_table.get_structure_for_keys(keys, use_parent) + query(''' + DROP TABLE IF EXISTS test.{0} + '''.format(name)) + + create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure) + TSV(query(create_query)) + + for i in range(4): + result = TSV(query('select * from test.{0}'.format(name))) + diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=False) + print test_table.process_diff(diff) + assert not diff + + key = [] + for key_name in keys: + if key_name.endswith('str'): + key.append("'" + str(i) + "'") + else: + key.append(str(i)) + if len(key) == 1: + key = 'toUInt64(' + str(i) + ')' + else: + key = str('(' + ','.join(key) + ')') + query("select dictGetUInt8('{0}', 'UInt8_', {1})".format(name, key)) + + result = TSV(query('select * from test.{0}'.format(name))) + diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True) + print test_table.process_diff(diff) + assert not diff diff --git a/dbms/tests/integration/test_dictionaries_update_and_reload/__init__.py b/dbms/tests/integration/test_dictionaries_update_and_reload/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_dictionaries_update_and_reload/configs/config.xml b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/config.xml new file mode 100644 index 00000000000..b60daf72dcf --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/config.xml @@ -0,0 +1,30 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + + /etc/clickhouse-server/config.d/*.xml + diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_cache_xypairs.xml b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/cache_xypairs.xml similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_cache_xypairs.xml rename to dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/cache_xypairs.xml diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_cmd.xml b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/executable.xml similarity index 93% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_cmd.xml rename to dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/executable.xml index 9f1e259e2d7..519a2915a59 100644 --- a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_cmd.xml +++ b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/executable.xml @@ -1,7 +1,7 @@ - cmd + executable echo '7\t8'; diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_file.txt b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/file.txt similarity index 100% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_file.txt rename to dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/file.txt diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_file.xml b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/file.xml similarity index 82% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_file.xml rename to dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/file.xml index 0e6db1f1637..2a937b5444d 100644 --- a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_file.xml +++ b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/file.xml @@ -4,7 +4,7 @@ file - /etc/clickhouse-server/config.d/dictionary_preset_file.txt + /etc/clickhouse-server/config.d/file.txt TabSeparated @@ -21,7 +21,7 @@ no_file - /etc/clickhouse-server/config.d/dictionary_preset_no_file.txt + /etc/clickhouse-server/config.d/no_file.txt TabSeparated @@ -38,7 +38,7 @@ no_file_2 - /etc/clickhouse-server/config.d/dictionary_preset_no_file_2.txt + /etc/clickhouse-server/config.d/no_file_2.txt TabSeparated diff --git a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_longload.xml b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/slow.xml similarity index 94% rename from dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_longload.xml rename to dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/slow.xml index f5d4cdec583..c6814c5fe9c 100644 --- a/dbms/tests/integration/test_dictionaries/configs/dictionaries/dictionary_preset_longload.xml +++ b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/dictionaries/slow.xml @@ -1,7 +1,7 @@ - longload + slow sleep 100 && echo '5\t6'; diff --git a/dbms/tests/integration/test_dictionaries_update_and_reload/configs/users.xml b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_update_and_reload/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/dbms/tests/integration/test_dictionaries_update_and_reload/test.py b/dbms/tests/integration/test_dictionaries_update_and_reload/test.py new file mode 100644 index 00000000000..b972dc6c918 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_update_and_reload/test.py @@ -0,0 +1,246 @@ +import pytest +import os +import time +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DICTIONARY_FILES = ['configs/dictionaries/cache_xypairs.xml', 'configs/dictionaries/executable.xml', 'configs/dictionaries/file.xml', 'configs/dictionaries/file.txt', 'configs/dictionaries/slow.xml'] + +cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) +instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + instance.query("CREATE DATABASE IF NOT EXISTS test") + + yield cluster + + finally: + cluster.shutdown() + + +def get_status(dictionary_name): + return instance.query("SELECT status FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n") + + +def get_last_exception(dictionary_name): + return instance.query("SELECT last_exception FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n").replace("\\'", "'") + + +def get_loading_start_time(dictionary_name): + s = instance.query("SELECT loading_start_time FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n") + if s == "0000-00-00 00:00:00": + return None + return time.strptime(s, "%Y-%m-%d %H:%M:%S") + + +def get_loading_duration(dictionary_name): + return float(instance.query("SELECT loading_duration FROM system.dictionaries WHERE name='" + dictionary_name + "'")) + + +def replace_in_file_in_container(file_name, what, replace_with): + instance.exec_in_container('sed -i "s/' + what + '/' + replace_with + '/g" ' + file_name) + + +def test_reload_while_loading(started_cluster): + query = instance.query + + # dictionaries_lazy_load == false, so this dictionary is not loaded. + assert get_status('slow') == "NOT_LOADED" + assert get_loading_duration('slow') == 0 + + # It's not possible to get a value from the dictionary within 1.0 second, so the following query fails by timeout. + assert query("SELECT dictGetInt32('slow', 'a', toUInt64(5))", timeout = 1, ignore_error = True) == "" + + # The dictionary is now loading. + assert get_status('slow') == "LOADING" + start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow') + assert duration > 0 + + time.sleep(0.5) # Still loading. + assert get_status('slow') == "LOADING" + prev_start_time, prev_duration = start_time, duration + start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow') + assert start_time == prev_start_time + assert duration >= prev_duration + + # SYSTEM RELOAD DICTIONARY should restart loading. + query("SYSTEM RELOAD DICTIONARY 'slow'") + assert get_status('slow') == "LOADING" + prev_start_time, prev_duration = start_time, duration + start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow') + assert start_time > prev_start_time + assert duration < prev_duration + + time.sleep(0.5) # Still loading. + assert get_status('slow') == "LOADING" + prev_start_time, prev_duration = start_time, duration + start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow') + assert start_time == prev_start_time + assert duration >= prev_duration + + # SYSTEM RELOAD DICTIONARIES should restart loading again. + query("SYSTEM RELOAD DICTIONARIES") + assert get_status('slow') == "LOADING" + prev_start_time, prev_duration = start_time, duration + start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow') + assert start_time > prev_start_time + assert duration < prev_duration + + # Changing the configuration file should restart loading one more time. + replace_in_file_in_container('/etc/clickhouse-server/config.d/slow.xml', 'sleep 100', 'sleep 0') + time.sleep(5) # Configuration files are reloaded once in 5 seconds. + + # This time loading should finish quickly. + assert get_status('slow') == "LOADED" + assert query("SELECT dictGetInt32('slow', 'a', toUInt64(5))") == "6\n" + + +def test_reload_after_loading(started_cluster): + query = instance.query + + assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "8\n" + assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "10\n" + + # Change the dictionaries' data. + replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '8', '81') + replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '10', '101') + + # SYSTEM RELOAD 'name' reloads only the specified dictionary. + query("SYSTEM RELOAD DICTIONARY 'executable'") + assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "81\n" + assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "10\n" + + query("SYSTEM RELOAD DICTIONARY 'file'") + assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "81\n" + assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "101\n" + + # SYSTEM RELOAD DICTIONARIES reloads all loaded dictionaries. + replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '81', '82') + replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '101', '102') + query("SYSTEM RELOAD DICTIONARIES") + assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "82\n" + assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "102\n" + + # Configuration files are reloaded and lifetimes are checked automatically once in 5 seconds. + replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '82', '83') + replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '102', '103') + time.sleep(5) + assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "103\n" + assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "83\n" + + +def test_reload_after_fail_by_system_reload(started_cluster): + query = instance.query + + # dictionaries_lazy_load == false, so this dictionary is not loaded. + assert get_status("no_file") == "NOT_LOADED" + + # We expect an error because the file source doesn't exist. + expected_error = "No such file" + assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") + assert get_status("no_file") == "FAILED" + + # SYSTEM RELOAD should not change anything now, the status is still FAILED. + query("SYSTEM RELOAD DICTIONARY 'no_file'") + assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") + assert get_status("no_file") == "FAILED" + + # Creating the file source makes the dictionary able to load. + instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/file.txt"), "/etc/clickhouse-server/config.d/no_file.txt") + query("SYSTEM RELOAD DICTIONARY 'no_file'") + query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n" + assert get_status("no_file") == "LOADED" + + # Removing the file source should not spoil the loaded dictionary. + instance.exec_in_container("rm /etc/clickhouse-server/config.d/no_file.txt") + query("SYSTEM RELOAD DICTIONARY 'no_file'") + query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n" + assert get_status("no_file") == "LOADED" + + +def test_reload_after_fail_by_timer(started_cluster): + query = instance.query + + # dictionaries_lazy_load == false, so this dictionary is not loaded. + assert get_status("no_file_2") == "NOT_LOADED" + + # We expect an error because the file source doesn't exist. + expected_error = "No such file" + assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") + assert get_status("no_file_2") == "FAILED" + + # Passed time should not change anything now, the status is still FAILED. + time.sleep(6); + assert expected_error in instance.query_and_get_error("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") + assert get_status("no_file_2") == "FAILED" + + # Creating the file source makes the dictionary able to load. + instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/file.txt"), "/etc/clickhouse-server/config.d/no_file_2.txt") + time.sleep(6); + query("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") == "10\n" + assert get_status("no_file_2") == "LOADED" + + # Removing the file source should not spoil the loaded dictionary. + instance.exec_in_container("rm /etc/clickhouse-server/config.d/no_file_2.txt") + time.sleep(6); + query("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") == "10\n" + assert get_status("no_file_2") == "LOADED" + + +def test_reload_after_fail_in_cache_dictionary(started_cluster): + query = instance.query + query_and_get_error = instance.query_and_get_error + + # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. + expected_error = "Table test.xypairs doesn't exist" + assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))") + assert get_status("cache_xypairs") == "LOADED" + assert expected_error in get_last_exception("cache_xypairs") + + # Create table `test.xypairs`. + query(''' + DROP TABLE IF EXISTS test.xypairs; + CREATE TABLE test.xypairs (x UInt64, y UInt64) ENGINE=Log; + INSERT INTO test.xypairs VALUES (1, 56), (3, 78); + ''') + + # Cache dictionary now works. + assert_eq_with_retry(instance, "SELECT dictGet('cache_xypairs', 'y', toUInt64(1))", "56", ignore_error=True) + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" + assert get_last_exception("cache_xypairs") == "" + + # Drop table `test.xypairs`. + query('DROP TABLE test.xypairs') + + # Values are cached so we can get them. + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(1))") == "56" + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" + assert get_last_exception("cache_xypairs") == "" + + # But we can't get a value from the source table which isn't cached. + assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(3))") + assert expected_error in get_last_exception("cache_xypairs") + + # Passed time should not spoil the cache. + time.sleep(5); + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(1))") == "56" + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" + assert expected_error in query_and_get_error("SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(3))") + assert expected_error in get_last_exception("cache_xypairs") + + # Create table `test.xypairs` again with changed values. + query(''' + CREATE TABLE test.xypairs (x UInt64, y UInt64) ENGINE=Log; + INSERT INTO test.xypairs VALUES (1, 57), (3, 79); + ''') + + # The cache dictionary returns new values now. + assert_eq_with_retry(instance, "SELECT dictGet('cache_xypairs', 'y', toUInt64(1))", "57") + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(2))") == "0" + query("SELECT dictGet('cache_xypairs', 'y', toUInt64(3))") == "79" + assert get_last_exception("cache_xypairs") == "" From 184e2ef9ab837c9586c09090d6635b7547a9a27e Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 2 Sep 2019 18:49:25 +0300 Subject: [PATCH 12/23] CLICKHOUSE-4644 arrayEnumerateUniqRanked: fix for empty arrays --- dbms/src/Functions/array/arrayEnumerateRanked.h | 4 +++- .../0_stateless/00909_arrayEnumerateUniq.reference | 6 ++++++ .../queries/0_stateless/00909_arrayEnumerateUniq.sql | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/array/arrayEnumerateRanked.h b/dbms/src/Functions/array/arrayEnumerateRanked.h index ab46af2266d..a1019ba83bf 100644 --- a/dbms/src/Functions/array/arrayEnumerateRanked.h +++ b/dbms/src/Functions/array/arrayEnumerateRanked.h @@ -336,7 +336,9 @@ void FunctionArrayEnumerateRankedExtended::executeMethodImpl( /// Skipping offsets if no data in this array if (prev_off == off) { - want_clear = true; + + if (depth_to_look > 2) + want_clear = true; if (depth_to_look >= 2) { diff --git a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference index f97d393cc32..595dcdf3803 100644 --- a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference +++ b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference @@ -278,3 +278,9 @@ a1,a2 12 [1,2] 1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1] 1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1] 1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1] +-- empty +[[1],[],[2]] +[[1],[],[2]] +[[1],[],[2],[],[3],[],[4],[],[5],[],[6],[],[7],[],[8],[],[9]] +[[],[1],[],[2],[],[3],[],[4],[],[5],[],[6],[],[7],[],[8]] +[[1],[2],[],[3]] diff --git a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql index 5f4b12e1988..9cf82a368d6 100644 --- a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql +++ b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql @@ -305,3 +305,11 @@ ARRAY JOIN Test.PuidVal AS PuidValArr; DROP TABLE arr_tests_visits; + + +select '-- empty'; +SELECT arrayEnumerateUniqRanked([['a'], [], ['a']]); +SELECT arrayEnumerateUniqRanked([[1], [], [1]]); +SELECT arrayEnumerateUniqRanked([[1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1]]); +SELECT arrayEnumerateUniqRanked([[], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1]]); +SELECT arrayEnumerateUniqRanked([[1], [1], [], [1]]); From 5af8c196bd88ab9e9845ac1d6038498daee72156 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 2 Sep 2019 19:31:09 +0300 Subject: [PATCH 13/23] Move initialization of MergedColumnOnlyStream to constructor --- .../MergedColumnOnlyOutputStream.cpp | 45 ++++++++----------- .../MergeTree/MergedColumnOnlyOutputStream.h | 1 - ...d_empty_part_single_column_write.reference | 2 + ...6_simpod_empty_part_single_column_write.sh | 41 +++++++++++++++++ 4 files changed, 61 insertions(+), 28 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.reference create mode 100755 dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh diff --git a/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index ba0f78bf3e6..a9e8f24d588 100644 --- a/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -21,34 +21,26 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( header(header_), sync(sync_), skip_offsets(skip_offsets_), already_written_offset_columns(already_written_offset_columns_) { + serialization_states.reserve(header.columns()); + WrittenOffsetColumns tmp_offset_columns; + IDataType::SerializeBinaryBulkSettings settings; + + for (const auto & column_name : header.getNames()) + { + const auto & col = header.getByName(column_name); + + const auto columns = storage.getColumns(); + addStreams(part_path, col.name, *col.type, columns.getCodecOrDefault(col.name, codec), 0, skip_offsets); + serialization_states.emplace_back(nullptr); + settings.getter = createStreamGetter(col.name, tmp_offset_columns, false); + col.type->serializeBinaryBulkStatePrefix(settings, serialization_states.back()); + } + + initSkipIndices(); } void MergedColumnOnlyOutputStream::write(const Block & block) { - if (!initialized) - { - column_streams.clear(); - serialization_states.clear(); - serialization_states.reserve(header.columns()); - WrittenOffsetColumns tmp_offset_columns; - IDataType::SerializeBinaryBulkSettings settings; - - for (const auto & column_name : header.getNames()) - { - const auto & col = block.getByName(column_name); - - const auto columns = storage.getColumns(); - addStreams(part_path, col.name, *col.type, columns.getCodecOrDefault(col.name, codec), 0, skip_offsets); - serialization_states.emplace_back(nullptr); - settings.getter = createStreamGetter(col.name, tmp_offset_columns, false); - col.type->serializeBinaryBulkStatePrefix(settings, serialization_states.back()); - } - - initSkipIndices(); - - initialized = true; - } - std::set skip_indexes_column_names_set; for (const auto & index : skip_indices) std::copy(index->columns.cbegin(), index->columns.cend(), @@ -68,7 +60,6 @@ void MergedColumnOnlyOutputStream::write(const Block & block) if (!rows) return; - size_t new_index_offset = 0; size_t new_current_mark = 0; WrittenOffsetColumns offset_columns = already_written_offset_columns; @@ -106,7 +97,8 @@ MergeTreeData::DataPart::Checksums MergedColumnOnlyOutputStream::writeSuffixAndG serialize_settings.getter = createStreamGetter(column.name, already_written_offset_columns, skip_offsets); column.type->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[i]); - if (with_final_mark) + /// We wrote at least one row + if (with_final_mark && (index_offset != 0 || current_mark != 0)) writeFinalMark(column.name, column.type, offset_columns, skip_offsets, serialize_settings.path); } @@ -125,7 +117,6 @@ MergeTreeData::DataPart::Checksums MergedColumnOnlyOutputStream::writeSuffixAndG column_streams.clear(); serialization_states.clear(); - initialized = false; return checksums; } diff --git a/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index 8970bf19565..f19c970ac41 100644 --- a/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -28,7 +28,6 @@ public: private: Block header; - bool initialized = false; bool sync; bool skip_offsets; diff --git a/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.reference b/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh new file mode 100755 index 00000000000..0c5eed30f66 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/mergetree_mutations.lib + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_with_empty_part" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part +( + id UInt64, + value UInt64 +) +ENGINE = MergeTree() +ORDER BY id +PARTITION BY id +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0 +" + + +${CLICKHOUSE_CLIENT} --query="INSERT INTO table_with_empty_part VALUES (1, 1)" + +${CLICKHOUSE_CLIENT} --query="INSERT INTO table_with_empty_part VALUES (2, 2)" + +${CLICKHOUSE_CLIENT} --query="ALTER TABLE table_with_empty_part DELETE WHERE id % 2 == 0" + +mutation_id=`${CLICKHOUSE_CLIENT} --query="SELECT mutation_id FROM system.mutations WHERE table='table_with_empty_part' LIMIT 1"` + +wait_for_mutation "table_with_empty_part" "$mutation_id" + +${CLICKHOUSE_CLIENT} --query="SELECT COUNT(DISTINCT value) FROM table_with_empty_part" + +${CLICKHOUSE_CLIENT} --query="ALTER TABLE table_with_empty_part MODIFY COLUMN value Nullable(UInt64)" + +${CLICKHOUSE_CLIENT} --query="SELECT COUNT(distinct value) FROM table_with_empty_part" + +${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE table_with_empty_part FINAL" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_with_empty_part" From 257df66b025887f9b5c90b1cc13e1dd8ba72a787 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Mon, 2 Sep 2019 21:29:56 +0300 Subject: [PATCH 14/23] fix flappy ttl test --- dbms/tests/queries/0_stateless/00933_ttl_with_default.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00933_ttl_with_default.sql b/dbms/tests/queries/0_stateless/00933_ttl_with_default.sql index e6c0a6e700c..d3f3b62126c 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_with_default.sql +++ b/dbms/tests/queries/0_stateless/00933_ttl_with_default.sql @@ -5,6 +5,7 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1); insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4); +select sleep(0.7) format Null; -- wait if very fast merge happen optimize table ttl_00933_2 final; select a from ttl_00933_2 order by a; @@ -15,6 +16,7 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1, 100); insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2, 200); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3, 300); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4, 400); +select sleep(0.7) format Null; -- wait if very fast merge happen optimize table ttl_00933_2 final; select a, b from ttl_00933_2 order by a; @@ -25,6 +27,7 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1, 5); insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2, 10); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3, 15); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4, 20); +select sleep(0.7) format Null; -- wait if very fast merge happen optimize table ttl_00933_2 final; select a, b from ttl_00933_2 order by a; From 9eb1a9bae6bc22bb8379d1c0df49f79822f735e5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 2 Sep 2019 21:53:16 +0300 Subject: [PATCH 15/23] fix links in docs --- docs/en/operations/settings/settings.md | 2 +- docs/ru/operations/settings/settings.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3457041f386..bac149330be 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -74,7 +74,7 @@ If `force_primary_key=1`, ClickHouse checks to see if the query has a primary ke ## format_schema -This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) or [Template](../../interfaces/formats.md#template-template). The value depends on the format. +This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) or [Template](../../interfaces/formats.md#template). The value depends on the format. ## fsync_metadata diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index fe4e609bd2a..831f5958c29 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -72,7 +72,7 @@ ClickHouse применяет настройку в тех случаях, ко ## format_schema -Параметр применяется в том случае, когда используются форматы, требующие определения схемы, например [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) или [Template](../../interfaces/formats.md#template-template). Значение параметра зависит от формата. +Параметр применяется в том случае, когда используются форматы, требующие определения схемы, например [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) или [Template](../../interfaces/formats.md#template). Значение параметра зависит от формата. ## fsync_metadata From 2b0af524cbe8f5221618069aeb2b0a304395992d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Sep 2019 22:13:12 +0300 Subject: [PATCH 16/23] Added a test #2282 --- ...008_materialized_view_henyihanwobushi.reference | 1 + .../01008_materialized_view_henyihanwobushi.sql | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.reference create mode 100644 dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.sql diff --git a/dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.reference b/dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.reference new file mode 100644 index 00000000000..2f3b99134b8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.reference @@ -0,0 +1 @@ +0000-00-00 1 bar_n_1 1 diff --git a/dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.sql b/dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.sql new file mode 100644 index 00000000000..8deec159dca --- /dev/null +++ b/dbms/tests/queries/0_stateless/01008_materialized_view_henyihanwobushi.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS foo; +DROP TABLE IF EXISTS bar; +DROP TABLE IF EXISTS view_foo_bar; + +create table foo (ddate Date, id Int64, n String) ENGINE = ReplacingMergeTree(ddate, (id), 8192); +create table bar (ddate Date, id Int64, n String, foo_id Int64) ENGINE = ReplacingMergeTree(ddate, (id), 8192); +insert into bar (id, n, foo_id) values (1, 'bar_n_1', 1); +create MATERIALIZED view view_foo_bar ENGINE = ReplacingMergeTree(ddate, (bar_id), 8192) as select ddate, bar_id, bar_n, foo_id, foo_n from (select ddate, id as bar_id, n as bar_n, foo_id from bar) any left join (select id as foo_id, n as foo_n from foo) using foo_id; +insert into bar (id, n, foo_id) values (1, 'bar_n_1', 1); +SELECT * FROM view_foo_bar; + +DROP TABLE foo; +DROP TABLE bar; +DROP TABLE view_foo_bar; From 04a3db58578c2d937a7095da02cb89af7a6cad0f Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 2 Sep 2019 22:32:08 +0300 Subject: [PATCH 17/23] Better test --- .../01006_simpod_empty_part_single_column_write.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 0c5eed30f66..58efd0f8bb8 100755 --- a/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/dbms/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -26,7 +26,9 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO table_with_empty_part VALUES (2, 2)" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE table_with_empty_part DELETE WHERE id % 2 == 0" -mutation_id=`${CLICKHOUSE_CLIENT} --query="SELECT mutation_id FROM system.mutations WHERE table='table_with_empty_part' LIMIT 1"` +sleep 0.5 + +mutation_id=`${CLICKHOUSE_CLIENT} --query="SELECT max(mutation_id) FROM system.mutations WHERE table='table_with_empty_part'"` wait_for_mutation "table_with_empty_part" "$mutation_id" From 31372d5f6b50a0577fa0cbeae0750e3d2858f101 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 2 Sep 2019 22:48:13 +0300 Subject: [PATCH 18/23] explicitly specify en language --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index bac149330be..9c711073177 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -74,7 +74,7 @@ If `force_primary_key=1`, ClickHouse checks to see if the query has a primary ke ## format_schema -This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) or [Template](../../interfaces/formats.md#template). The value depends on the format. +This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) or [Template](https://clickhouse.yandex/docs/en/interfaces/formats/#template). The value depends on the format. ## fsync_metadata From 89e86e1d41734f3612524c54422dc0e1cbb1bd9f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Sep 2019 22:57:09 +0300 Subject: [PATCH 19/23] Slightly better config management in integration tests --- ...common_instance_config.xml => 0_common_instance_config.xml} | 0 dbms/tests/integration/helpers/cluster.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename dbms/tests/integration/helpers/{common_instance_config.xml => 0_common_instance_config.xml} (100%) diff --git a/dbms/tests/integration/helpers/common_instance_config.xml b/dbms/tests/integration/helpers/0_common_instance_config.xml similarity index 100% rename from dbms/tests/integration/helpers/common_instance_config.xml rename to dbms/tests/integration/helpers/0_common_instance_config.xml diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 1288aaa23f2..aadd2e70a52 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -723,7 +723,8 @@ class ClickHouseInstance: os.mkdir(config_d_dir) os.mkdir(users_d_dir) - shutil.copy(p.join(HELPERS_DIR, 'common_instance_config.xml'), config_d_dir) + # The file is named with 0_ prefix to be processed before other configuration overloads. + shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), config_d_dir) # Generate and write macros file macros = self.macros.copy() From fce1b4725006abb349dfd103a68e4c1c5391f871 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 2 Sep 2019 22:58:45 +0300 Subject: [PATCH 20/23] more JOIN refactoring: move logic out of ExpressionAnalyzer/ExpressionAction --- dbms/src/Interpreters/AnalyzedJoin.cpp | 122 ++++++++++++++++++- dbms/src/Interpreters/AnalyzedJoin.h | 36 ++++-- dbms/src/Interpreters/ExpressionActions.cpp | 79 ++---------- dbms/src/Interpreters/ExpressionActions.h | 10 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 66 +++------- dbms/src/Interpreters/ExpressionAnalyzer.h | 5 +- dbms/src/Interpreters/Join.cpp | 13 +- dbms/src/Interpreters/Join.h | 8 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 48 +++----- dbms/src/Interpreters/SyntaxAnalyzer.h | 6 +- 10 files changed, 216 insertions(+), 177 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 6a3b9b8ac1b..36b573c4093 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -1,13 +1,18 @@ #include #include #include +#include #include #include #include +#include #include +#include + + namespace DB { @@ -26,7 +31,6 @@ void AnalyzedJoin::addUsingKey(const ASTPtr & ast) void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) { - with_using = false; key_names_left.push_back(left_table_ast->getColumnName()); key_names_right.push_back(right_table_ast->getAliasOrColumnName()); @@ -37,7 +41,7 @@ void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) /// @return how many times right key appears in ON section. size_t AnalyzedJoin::rightKeyInclusion(const String & name) const { - if (with_using) + if (hasUsing()) return 0; size_t count = 0; @@ -101,6 +105,120 @@ std::unordered_map AnalyzedJoin::getOriginalColumnsMap(const Nam return out; } +ASTPtr AnalyzedJoin::leftKeysList() const +{ + ASTPtr keys_list = std::make_shared(); + keys_list->children = key_asts_left; + return keys_list; +} + +ASTPtr AnalyzedJoin::rightKeysList() const +{ + ASTPtr keys_list = std::make_shared(); + if (hasOn()) + keys_list->children = key_asts_right; + return keys_list; +} + +Names AnalyzedJoin::requiredJoinedNames() const +{ + NameSet required_columns_set(key_names_right.begin(), key_names_right.end()); + for (const auto & joined_column : columns_added_by_join) + required_columns_set.insert(joined_column.name); + + return Names(required_columns_set.begin(), required_columns_set.end()); +} + +void AnalyzedJoin::appendRequiredColumns(const Block & sample, NameSet & required_columns) const +{ + for (auto & column : key_names_right) + if (!sample.has(column)) + required_columns.insert(column); + + for (auto & column : columns_added_by_join) + if (!sample.has(column.name)) + required_columns.insert(column.name); +} + +void AnalyzedJoin::addJoinedColumn(const NameAndTypePair & joined_column) +{ + if (join_use_nulls && isLeftOrFull(table_join.kind)) + { + auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; + columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type))); + } + else + columns_added_by_join.push_back(joined_column); +} + +void AnalyzedJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const +{ + bool right_or_full_join = isRightOrFull(table_join.kind); + bool left_or_full_join = isLeftOrFull(table_join.kind); + + for (auto & col : sample_block) + { + /// Materialize column. + /// Column is not empty if it is constant, but after Join all constants will be materialized. + /// So, we need remove constants from header. + if (col.column) + col.column = nullptr; + + bool make_nullable = join_use_nulls && right_or_full_join; + + if (make_nullable && col.type->canBeInsideNullable()) + col.type = makeNullable(col.type); + } + + for (const auto & col : columns_added_by_join) + { + auto res_type = col.type; + + bool make_nullable = join_use_nulls && left_or_full_join; + + if (!make_nullable) + { + /// Keys from right table are usually not stored in Join, but copied from the left one. + /// So, if left key is nullable, let's make right key nullable too. + /// Note: for some join types it's not needed and, probably, may be removed. + /// Note: changing this code, take into account the implementation in Join.cpp. + auto it = std::find(key_names_right.begin(), key_names_right.end(), col.name); + if (it != key_names_right.end()) + { + auto pos = it - key_names_right.begin(); + const auto & left_key_name = key_names_left[pos]; + make_nullable = sample_block.getByName(left_key_name).type->isNullable(); + } + } + + if (make_nullable && res_type->canBeInsideNullable()) + res_type = makeNullable(res_type); + + sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name)); + } +} + +bool AnalyzedJoin::sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y) +{ + if (!x && !y) + return true; + if (!x || !y) + return false; + + return x->table_join.kind == y->table_join.kind + && x->table_join.strictness == y->table_join.strictness + && x->key_names_left == y->key_names_left + && x->key_names_right == y->key_names_right + && x->columns_added_by_join == y->columns_added_by_join; +} + +JoinPtr AnalyzedJoin::makeHashJoin(const Block & sample_block, const SizeLimits & size_limits_for_join) const +{ + auto join = std::make_shared(key_names_right, join_use_nulls, size_limits_for_join, table_join.kind, table_join.strictness); + join->setSampleBlock(sample_block); + return join; +} + NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpression & table_expression, const Context & context) { NamesAndTypesList names_and_type_list; diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 1ce11da95e0..34fbede0d89 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include #include #include @@ -13,6 +14,10 @@ namespace DB class Context; class ASTSelectQuery; struct DatabaseAndTableWithAlias; +class Block; + +class Join; +using JoinPtr = std::shared_ptr; struct AnalyzedJoin { @@ -30,18 +35,19 @@ struct AnalyzedJoin private: friend class SyntaxAnalyzer; - friend struct SyntaxAnalyzerResult; - friend class ExpressionAnalyzer; - friend class SelectQueryExpressionAnalyzer; Names key_names_left; Names key_names_right; /// Duplicating names are qualified. ASTs key_asts_left; ASTs key_asts_right; - bool with_using = true; + ASTTableJoin table_join; + bool join_use_nulls = false; /// All columns which can be read from joined table. Duplicating names are qualified. NamesAndTypesList columns_from_joined_table; + /// Columns will be added to block by JOIN. It's a subset of columns_from_joined_table with corrected Nullability + NamesAndTypesList columns_added_by_join; + /// Name -> original name. Names are the same as in columns_from_joined_table list. std::unordered_map original_names; /// Original name -> name. Only ranamed columns. @@ -51,8 +57,8 @@ public: void addUsingKey(const ASTPtr & ast); void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast); - bool hasUsing() const { return with_using; } - bool hasOn() const { return !with_using; } + bool hasUsing() const { return table_join.using_expression_list != nullptr; } + bool hasOn() const { return !hasUsing(); } NameSet getQualifiedColumnsSet() const; NameSet getOriginalColumnsSet() const; @@ -60,6 +66,22 @@ public: void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); size_t rightKeyInclusion(const String & name) const; + + void appendRequiredColumns(const Block & sample, NameSet & required_columns) const; + void addJoinedColumn(const NameAndTypePair & joined_column); + void addJoinedColumnsAndCorrectNullability(Block & sample_block) const; + + ASTPtr leftKeysList() const; + ASTPtr rightKeysList() const; /// For ON syntax only + + Names requiredJoinedNames() const; + const Names & keyNamesLeft() const { return key_names_left; } + const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; } + const NamesAndTypesList & columnsAddedByJoin() const { return columns_added_by_join; } + + JoinPtr makeHashJoin(const Block & sample_block, const SizeLimits & size_limits_for_join) const; + + static bool sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y); }; struct ASTTableExpression; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 5248350b99b..160f9d68672 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -12,7 +12,6 @@ #include #include #include -#include namespace ProfileEvents @@ -45,7 +44,8 @@ Names ExpressionAction::getNeededColumns() const res.insert(res.end(), array_joined_columns.begin(), array_joined_columns.end()); - res.insert(res.end(), join_key_names_left.begin(), join_key_names_left.end()); + if (join_params) + res.insert(res.end(), join_params->keyNamesLeft().begin(), join_params->keyNamesLeft().end()); for (const auto & column : projection) res.push_back(column.first); @@ -159,20 +159,12 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column return a; } -ExpressionAction ExpressionAction::ordinaryJoin( - const ASTTableJoin & join_params, - std::shared_ptr join_, - const Names & join_key_names_left, - const Names & join_key_names_right, - const NamesAndTypesList & columns_added_by_join_) +ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join_params, std::shared_ptr hash_join) { ExpressionAction a; a.type = JOIN; - a.join = std::move(join_); - a.join_kind = join_params.kind; - a.join_key_names_left = join_key_names_left; - a.join_key_names_right = join_key_names_right; - a.columns_added_by_join = columns_added_by_join_; + a.join_params = join_params; + a.join = hash_join; return a; } @@ -277,51 +269,7 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, case JOIN: { - bool is_null_used_as_default = settings.join_use_nulls; - bool right_or_full_join = isRightOrFull(join_kind); - bool left_or_full_join = isLeftOrFull(join_kind); - - for (auto & col : sample_block) - { - /// Materialize column. - /// Column is not empty if it is constant, but after Join all constants will be materialized. - /// So, we need remove constants from header. - if (col.column) - col.column = nullptr; - - bool make_nullable = is_null_used_as_default && right_or_full_join; - - if (make_nullable && col.type->canBeInsideNullable()) - col.type = makeNullable(col.type); - } - - for (const auto & col : columns_added_by_join) - { - auto res_type = col.type; - - bool make_nullable = is_null_used_as_default && left_or_full_join; - - if (!make_nullable) - { - /// Keys from right table are usually not stored in Join, but copied from the left one. - /// So, if left key is nullable, let's make right key nullable too. - /// Note: for some join types it's not needed and, probably, may be removed. - /// Note: changing this code, take into account the implementation in Join.cpp. - auto it = std::find(join_key_names_right.begin(), join_key_names_right.end(), col.name); - if (it != join_key_names_right.end()) - { - auto pos = it - join_key_names_right.begin(); - const auto & left_key_name = join_key_names_left[pos]; - make_nullable = sample_block.getByName(left_key_name).type->isNullable(); - } - } - - if (make_nullable && res_type->canBeInsideNullable()) - res_type = makeNullable(res_type); - - sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name)); - } - + join_params->addJoinedColumnsAndCorrectNullability(sample_block); break; } @@ -527,7 +475,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const case JOIN: { - join->joinBlock(block, join_key_names_left, columns_added_by_join); + join->joinBlock(block, *join_params); break; } @@ -645,9 +593,10 @@ std::string ExpressionAction::toString() const case JOIN: ss << "JOIN "; - for (NamesAndTypesList::const_iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it) + for (NamesAndTypesList::const_iterator it = join_params->columnsAddedByJoin().begin(); + it != join_params->columnsAddedByJoin().end(); ++it) { - if (it != columns_added_by_join.begin()) + if (it != join_params->columnsAddedByJoin().begin()) ss << ", "; ss << it->name; } @@ -1220,7 +1169,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh for (const auto & action : actions) if (action.join && isRightOrFull(action.join->getKind())) return action.join->createStreamWithNonJoinedRows( - source_header, action.join_key_names_left, action.columns_added_by_join, max_block_size); + source_header, *action.join_params, max_block_size); return {}; } @@ -1267,7 +1216,7 @@ UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action hash.update(col); break; case JOIN: - for (const auto & col : action.columns_added_by_join) + for (const auto & col : action.join_params->columnsAddedByJoin()) hash.update(col.name); break; case PROJECT: @@ -1326,9 +1275,7 @@ bool ExpressionAction::operator==(const ExpressionAction & other) const && array_joined_columns == other.array_joined_columns && array_join_is_left == other.array_join_is_left && join == other.join - && join_key_names_left == other.join_key_names_left - && join_key_names_right == other.join_key_names_right - && columns_added_by_join == other.columns_added_by_join + && AnalyzedJoin::sameJoin(join_params.get(), other.join_params.get()) && projection == other.projection && is_function_compiled == other.is_function_compiled; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 62d50131b45..90638d86368 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "config_core.h" #include @@ -104,11 +105,8 @@ public: bool unaligned_array_join = false; /// For JOIN + std::shared_ptr join_params = nullptr; std::shared_ptr join; - ASTTableJoin::Kind join_kind; - Names join_key_names_left; - Names join_key_names_right; - NamesAndTypesList columns_added_by_join; /// For PROJECT. NamesWithAliases projection; @@ -124,9 +122,7 @@ public: static ExpressionAction project(const Names & projected_columns_); static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); - static ExpressionAction ordinaryJoin(const ASTTableJoin & join_params, std::shared_ptr join_, - const Names & join_key_names_left, const Names & join_key_names_right, - const NamesAndTypesList & columns_added_by_join_); + static ExpressionAction ordinaryJoin(std::shared_ptr join_params, std::shared_ptr hash_join); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 2d0b3c2729a..e452d62ffca 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -134,14 +133,8 @@ void ExpressionAnalyzer::analyzeAggregation() const ASTTablesInSelectQueryElement * join = select_query->join(); if (join) { - const auto & table_join = join->table_join->as(); - if (table_join.using_expression_list) - getRootActions(table_join.using_expression_list, true, temp_actions); - if (table_join.on_expression) - for (const auto & key_ast : analyzedJoin().key_asts_left) - getRootActions(key_ast, true, temp_actions); - - addJoinAction(table_join, temp_actions); + getRootActions(analyzedJoin().leftKeysList(), true, temp_actions); + addJoinAction(temp_actions); } } @@ -298,7 +291,8 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) { NamesAndTypesList temp_columns = sourceColumns(); temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end()); - temp_columns.insert(temp_columns.end(), columnsAddedByJoin().begin(), columnsAddedByJoin().end()); + temp_columns.insert(temp_columns.end(), + analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end()); ExpressionActionsPtr temp_actions = std::make_shared(temp_columns, context); getRootActions(left_in_operand, true, temp_actions); @@ -412,22 +406,10 @@ bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & cha return true; } -static void appendRequiredColumns( - NameSet & required_columns, const Block & sample, const Names & key_names_right, const NamesAndTypesList & columns_added_by_join) -{ - for (auto & column : key_names_right) - if (!sample.has(column)) - required_columns.insert(column); - - for (auto & column : columns_added_by_join) - if (!sample.has(column.name)) - required_columns.insert(column.name); -} - /// It's possible to set nullptr as join for only_types mode -void ExpressionAnalyzer::addJoinAction(const ASTTableJoin & join_params, ExpressionActionsPtr & actions, JoinPtr join) const +void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const { - actions->add(ExpressionAction::ordinaryJoin(join_params, std::move(join), analyzedJoin().key_names_left, analyzedJoin().key_names_right, columnsAddedByJoin())); + actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join)); } bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -438,16 +420,11 @@ bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, b SubqueryForSet & subquery_for_set = getSubqueryForJoin(*ast_join); - ASTPtr left_keys_list = std::make_shared(); - left_keys_list->children = analyzedJoin().key_asts_left; - initChain(chain, sourceColumns()); ExpressionActionsChain::Step & step = chain.steps.back(); - auto & join_params = ast_join->table_join->as(); - - getRootActions(left_keys_list, only_types, step.actions); - addJoinAction(join_params, step.actions, subquery_for_set.join); + getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions); + addJoinAction(step.actions, subquery_for_set.join); return true; } @@ -524,11 +501,9 @@ void SelectQueryExpressionAnalyzer::makeHashJoin(const ASTTablesInSelectQueryEle Names action_columns = joined_block_actions->getRequiredColumns(); NameSet required_columns(action_columns.begin(), action_columns.end()); - auto & analyzed_join = analyzedJoin(); - appendRequiredColumns( - required_columns, joined_block_actions->getSampleBlock(), analyzed_join.key_names_right, columnsAddedByJoin()); + analyzedJoin().appendRequiredColumns(joined_block_actions->getSampleBlock(), required_columns); - auto original_map = analyzed_join.getOriginalColumnsMap(required_columns); + auto original_map = analyzedJoin().getOriginalColumnsMap(required_columns); Names original_columns; for (auto & pr : original_map) original_columns.push_back(pr.second); @@ -542,29 +517,16 @@ void SelectQueryExpressionAnalyzer::makeHashJoin(const ASTTablesInSelectQueryEle joined_block_actions->execute(sample_block); /// TODO You do not need to set this up when JOIN is only needed on remote servers. - auto & join_params = join_element.table_join->as(); - subquery_for_set.join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, - settings.size_limits_for_join, join_params.kind, join_params.strictness); - subquery_for_set.join->setSampleBlock(sample_block); + subquery_for_set.join = analyzedJoin().makeHashJoin(sample_block, settings.size_limits_for_join); subquery_for_set.joined_block_actions = joined_block_actions; } ExpressionActionsPtr SelectQueryExpressionAnalyzer::createJoinedBlockActions() const { - /// Create custom expression list with join keys from right table. - ASTPtr expression_list = std::make_shared(); - ASTs & children = expression_list->children; + ASTPtr expression_list = analyzedJoin().rightKeysList(); + Names required_columns = analyzedJoin().requiredJoinedNames(); - if (analyzedJoin().hasOn()) - for (const auto & join_right_key : analyzedJoin().key_asts_right) - children.emplace_back(join_right_key); - - NameSet required_columns_set(analyzedJoin().key_names_right.begin(), analyzedJoin().key_names_right.end()); - for (const auto & joined_column : columnsAddedByJoin()) - required_columns_set.insert(joined_column.name); - Names required_columns(required_columns_set.begin(), required_columns_set.end()); - - auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, analyzedJoin().columns_from_joined_table, required_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, analyzedJoin().columnsFromJoinedTable(), required_columns); return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 2eafe4b85f0..a28f54210b2 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -121,9 +121,8 @@ protected: SyntaxAnalyzerResultPtr syntax; const StoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. - const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; } + const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; } const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } - const NamesAndTypesList & columnsAddedByJoin() const { return syntax->columns_added_by_join; } const std::vector & aggregates() const { return syntax->aggregates; } /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. @@ -131,7 +130,7 @@ protected: void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const; - void addJoinAction(const ASTTableJoin & join_params, ExpressionActionsPtr & actions, JoinPtr join = {}) const; + void addJoinAction(ExpressionActionsPtr & actions, JoinPtr join = {}) const; void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 63bf88a8437..855b0d284e1 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -1048,8 +1049,11 @@ void Join::joinGet(Block & block, const String & column_name) const } -void Join::joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const +void Join::joinBlock(Block & block, const AnalyzedJoin & join_params) const { + const Names & key_names_left = join_params.keyNamesLeft(); + const NamesAndTypesList & columns_added_by_join = join_params.columnsAddedByJoin(); + std::shared_lock lock(rwlock); checkTypesOfKeys(block, key_names_left, sample_block_with_keys); @@ -1457,10 +1461,11 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, - const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const AnalyzedJoin & join_params, + UInt64 max_block_size) const { - return std::make_shared(*this, left_sample_block, key_names_left, columns_added_by_join, max_block_size); + return std::make_shared(*this, left_sample_block, + join_params.keyNamesLeft(), join_params.columnsAddedByJoin(), max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 4756a5680ef..1a85481cf39 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -26,6 +26,8 @@ namespace DB { +struct AnalyzedJoin; + namespace JoinStuff { @@ -141,7 +143,7 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const; + void joinBlock(Block & block, const AnalyzedJoin & join_params) const; /// Infer the return type for joinGet function DataTypePtr joinGetReturnType(const String & column_name) const; @@ -161,8 +163,8 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, - const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const AnalyzedJoin & join_params, + UInt64 max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 0c73beeef16..1298d1733fb 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -489,14 +489,13 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } } -void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, ASTTableJoin::Kind & join_kind) +void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, ASTTableJoin & out_table_join) { const ASTTablesInSelectQueryElement * node = select_query.join(); if (!node) return; auto & table_join = const_cast(node)->table_join->as(); - join_kind = table_join.kind; if (table_join.strictness == ASTTableJoin::Strictness::Unspecified && table_join.kind != ASTTableJoin::Kind::Cross) @@ -509,6 +508,8 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", DB::ErrorCodes::EXPECTED_ALL_OR_ANY); } + + out_table_join = table_join; } /// Find the columns that are obtained by JOIN. @@ -609,8 +610,7 @@ std::vector getAggregates(const ASTPtr & query) /// Calculate which columns are required to execute the expression. /// Then, delete all other columns from the list of available columns. /// After execution, columns will only contain the list of columns needed to read from the table. -void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns, - bool make_joined_columns_nullable) +void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns) { /// We caclulate required_source_columns with source_columns modifications and swap them on exit required_source_columns = source_columns; @@ -637,8 +637,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA avaliable_columns.insert(name.name); /// Add columns obtained by JOIN (if needed). - columns_added_by_join.clear(); - for (const auto & joined_column : analyzed_join.columns_from_joined_table) + for (const auto & joined_column : analyzed_join->columnsFromJoinedTable()) { auto & name = joined_column.name; if (avaliable_columns.count(name)) @@ -647,16 +646,9 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA if (required.count(name)) { /// Optimisation: do not add columns needed only in JOIN ON section. - if (columns_context.nameInclusion(name) > analyzed_join.rightKeyInclusion(name)) - { - if (make_joined_columns_nullable) - { - auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; - columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type))); - } - else - columns_added_by_join.push_back(joined_column); - } + if (columns_context.nameInclusion(name) > analyzed_join->rightKeyInclusion(name)) + analyzed_join->addJoinedColumn(joined_column); + required.erase(name); } } @@ -766,7 +758,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA if (columns_context.has_table_join) { ss << ", joined columns:"; - for (const auto & column : analyzed_join.columns_from_joined_table) + for (const auto & column : analyzed_join->columnsFromJoinedTable()) ss << " '" << column.name << "'"; } @@ -798,15 +790,17 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( storage = context.tryGetTable(db_and_table->database, db_and_table->table); } + const auto & settings = context.getSettingsRef(); + SyntaxAnalyzerResult result; result.storage = storage; result.source_columns = source_columns_; + result.analyzed_join = std::make_shared(); /// TODO: move to select_query logic + result.analyzed_join->join_use_nulls = settings.join_use_nulls; collectSourceColumns(select_query, result.storage, result.source_columns); NameSet source_columns_set = removeDuplicateColumns(result.source_columns); - const auto & settings = context.getSettingsRef(); - Names source_columns_list; source_columns_list.reserve(result.source_columns.size()); for (const auto & type_name : result.source_columns) @@ -831,13 +825,13 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( const auto & joined_expression = node->table_expression->as(); DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase()); - result.analyzed_join.columns_from_joined_table = getNamesAndTypeListFromTableExpression(joined_expression, context); - result.analyzed_join.deduplicateAndQualifyColumnNames(source_columns_set, table.getQualifiedNamePrefix()); + result.analyzed_join->columns_from_joined_table = getNamesAndTypeListFromTableExpression(joined_expression, context); + result.analyzed_join->deduplicateAndQualifyColumnNames(source_columns_set, table.getQualifiedNamePrefix()); } translateQualifiedNames(query, *select_query, context, (storage ? storage->getColumns().getOrdinary().getNames() : source_columns_list), source_columns_set, - result.analyzed_join.getQualifiedColumnsSet()); + result.analyzed_join->getQualifiedColumnsSet()); /// Rewrite IN and/or JOIN for distributed tables according to distributed_product_mode setting. InJoinSubqueriesPreprocessor(context).visit(query); @@ -872,7 +866,6 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query); - bool make_joined_columns_nullable = false; if (select_query) { /// GROUP BY injective function elimination. @@ -893,15 +886,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Push the predicate expression down to the subqueries. result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); - ASTTableJoin::Kind join_kind = ASTTableJoin::Kind::Comma; - setJoinStrictness(*select_query, settings.join_default_strictness, join_kind); - make_joined_columns_nullable = settings.join_use_nulls && isLeftOrFull(join_kind); - - collectJoinedColumns(result.analyzed_join, *select_query, source_columns_set, result.aliases); + setJoinStrictness(*select_query, settings.join_default_strictness, result.analyzed_join->table_join); + collectJoinedColumns(*result.analyzed_join, *select_query, source_columns_set, result.aliases); } result.aggregates = getAggregates(query); - result.collectUsedColumns(query, additional_source_columns, make_joined_columns_nullable); + result.collectUsedColumns(query, additional_source_columns); return std::make_shared(result); } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index e95d7354e8a..a2187ee2ef0 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -15,13 +15,11 @@ class ASTFunction; struct SyntaxAnalyzerResult { StoragePtr storage; - AnalyzedJoin analyzed_join; + std::shared_ptr analyzed_join; NamesAndTypesList source_columns; /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns. NamesAndTypesList required_source_columns; - /// Columns will be added to block by JOIN. It's a subset of analyzed_join.columns_from_joined_table with corrected Nullability - NamesAndTypesList columns_added_by_join; Aliases aliases; std::vector aggregates; @@ -42,7 +40,7 @@ struct SyntaxAnalyzerResult /// Predicate optimizer overrides the sub queries bool rewrite_subqueries = false; - void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns, bool make_joined_columns_nullable); + void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns); Names requiredSourceColumns() const { return required_source_columns.getNames(); } }; From 1903053aa1a848134ad8efc5e16429279430ccb8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Sep 2019 22:58:48 +0300 Subject: [PATCH 21/23] Fixed error in test --- dbms/tests/integration/test_timezone_config/configs/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_timezone_config/configs/config.xml b/dbms/tests/integration/test_timezone_config/configs/config.xml index 46c16a7688d..c601a1d09ef 100644 --- a/dbms/tests/integration/test_timezone_config/configs/config.xml +++ b/dbms/tests/integration/test_timezone_config/configs/config.xml @@ -1,4 +1,4 @@ - America/Los_Angeles + America/Los_Angeles From 9ec91df9646fcf4403143bae8fde6bcd74ba1e7d Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 2 Sep 2019 17:10:20 -0300 Subject: [PATCH 22/23] Update other_functions.md Example of max_block_size impact on runningDifference. + runningDifferenceStartingWithFirstValue description --- .../functions/other_functions.md | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index fdc46a0d4ee..44bf76b7047 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -369,6 +369,39 @@ FROM └─────────┴─────────────────────┴───────┘ ``` +Обратите внимание — размер блока влияет на результат. С каждым новым блоком состояние `runningDifference` сбрасывается. + +``` sql +SELECT + number, + runningDifference(number + 1) AS diff +FROM numbers(100000) +WHERE diff != 1 + +┌─number─┬─diff─┐ +│ 0 │ 0 │ +└────────┴──────┘ +┌─number─┬─diff─┐ +│ 65536 │ 0 │ +└────────┴──────┘ + + +set max_block_size=100000 // по умолчанию 65536! + +SELECT + number, + runningDifference(number + 1) AS diff +FROM numbers(100000) +WHERE diff != 1 + +┌─number─┬─diff─┐ +│ 0 │ 0 │ +└────────┴──────┘ +``` + +## runningDifferenceStartingWithFirstValue +То же, что и [runningDifference] (./other_functions.md # other_functions-runningdifference), но в первой строке возвращается значение первой строки, а не ноль. + ## MACNumToString(num) Принимает число типа UInt64. Интерпретирует его, как MAC-адрес в big endian. Возвращает строку, содержащую соответствующий MAC-адрес в формате AA:BB:CC:DD:EE:FF (числа в шестнадцатеричной форме через двоеточие). From d7763e132e8bd21854adc9ed3dfd8158bbf6adfc Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Mon, 2 Sep 2019 17:15:40 -0300 Subject: [PATCH 23/23] Update other_functions.md Example of max_block_size impact on runningDifference. --- .../functions/other_functions.md | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index 349397059af..7a883bb0464 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -394,6 +394,33 @@ FROM └─────────┴─────────────────────┴───────┘ ``` +Please note - block size affects the result. With each new block, the `runningDifference` state is reset. + +``` sql +SELECT + number, + runningDifference(number + 1) AS diff +FROM numbers(100000) +WHERE diff != 1 +┌─number─┬─diff─┐ +│ 0 │ 0 │ +└────────┴──────┘ +┌─number─┬─diff─┐ +│ 65536 │ 0 │ +└────────┴──────┘ + +set max_block_size=100000 // default value is 65536! + +SELECT + number, + runningDifference(number + 1) AS diff +FROM numbers(100000) +WHERE diff != 1 +┌─number─┬─diff─┐ +│ 0 │ 0 │ +└────────┴──────┘ +``` + ## runningDifferenceStartingWithFirstValue Same as for [runningDifference](./other_functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.