From d5f8e0347ffd37b048d964a3982e12f9356b4fce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:11:11 +0300 Subject: [PATCH 01/12] Better setup of default time zone from configuration file --- libs/libdaemon/src/BaseDaemon.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 5c51b357f8a..807506775ae 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -597,10 +597,12 @@ void BaseDaemon::initialize(Application & self) /// This must be done before any usage of DateLUT. In particular, before any logging. if (config().has("timezone")) { - if (0 != setenv("TZ", config().getString("timezone").data(), 1)) + const std::string timezone = config().getString("timezone"); + if (0 != setenv("TZ", timezone.data(), 1)) throw Poco::Exception("Cannot setenv TZ variable"); tzset(); + DateLUT::setDefaultTimezone(timezone); } std::string log_path = config().getString("logger.log", ""); From 30cc5698453653aaec363103c6ab971fa45a3d81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:11:40 +0300 Subject: [PATCH 02/12] Avoid initializing DateLUT in static constructors --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 58 +++++++++---------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 8fb6ab5a359..0aed11a48c9 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -13,6 +13,8 @@ #include #include #include +#include + namespace DB { @@ -27,34 +29,28 @@ namespace DB extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; extern const int THERE_IS_NO_COLUMN; } - const std::unordered_map> arrow_type_to_internal_type = { - //{arrow::Type::DECIMAL, std::make_shared()}, - {arrow::Type::UINT8, std::make_shared()}, - {arrow::Type::INT8, std::make_shared()}, - {arrow::Type::UINT16, std::make_shared()}, - {arrow::Type::INT16, std::make_shared()}, - {arrow::Type::UINT32, std::make_shared()}, - {arrow::Type::INT32, std::make_shared()}, - {arrow::Type::UINT64, std::make_shared()}, - {arrow::Type::INT64, std::make_shared()}, - {arrow::Type::HALF_FLOAT, std::make_shared()}, - {arrow::Type::FLOAT, std::make_shared()}, - {arrow::Type::DOUBLE, std::make_shared()}, - {arrow::Type::BOOL, std::make_shared()}, - //{arrow::Type::DATE32, std::make_shared()}, - {arrow::Type::DATE32, std::make_shared()}, - //{arrow::Type::DATE32, std::make_shared()}, - {arrow::Type::DATE64, std::make_shared()}, - {arrow::Type::TIMESTAMP, std::make_shared()}, - //{arrow::Type::TIME32, std::make_shared()}, + static const std::initializer_list> arrow_type_to_internal_type = + { + {arrow::Type::UINT8, "UInt8"}, + {arrow::Type::INT8, "Int8"}, + {arrow::Type::UINT16, "UInt16"}, + {arrow::Type::INT16, "Int16"}, + {arrow::Type::UINT32, "UInt32"}, + {arrow::Type::INT32, "Int32"}, + {arrow::Type::UINT64, "UInt64"}, + {arrow::Type::INT64, "Int64"}, + {arrow::Type::HALF_FLOAT, "Float32"}, + {arrow::Type::FLOAT, "Float32"}, + {arrow::Type::DOUBLE, "Float64"}, + {arrow::Type::BOOL, "UInt8"}, + {arrow::Type::DATE32, "Date"}, + {arrow::Type::DATE64, "DateTime"}, + {arrow::Type::TIMESTAMP, "DateTime"}, - {arrow::Type::STRING, std::make_shared()}, - {arrow::Type::BINARY, std::make_shared()}, - //{arrow::Type::FIXED_SIZE_BINARY, std::make_shared()}, - //{arrow::Type::UUID, std::make_shared()}, - + {arrow::Type::STRING, "String"}, + {arrow::Type::BINARY, "String"}, // TODO: add other types that are convertable to internal ones: // 0. ENUM? @@ -253,7 +249,7 @@ namespace DB void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk &res, std::shared_ptr &table, arrow::Status &read_status, const Block &header, int &row_group_current, const Context &context, std::string format_name) - { + { Columns columns_list; UInt64 num_rows = 0; @@ -308,14 +304,16 @@ namespace DB const auto decimal_type = static_cast(arrow_column->type().get()); internal_nested_type = std::make_shared>(decimal_type->precision(), decimal_type->scale()); - } else if (arrow_type_to_internal_type.find(arrow_type) != arrow_type_to_internal_type.end()) + } + else if (auto internal_type_it = std::find_if(arrow_type_to_internal_type.begin(), arrow_type_to_internal_type.end(), + [=](auto && elem) { return elem.first == arrow_type; }); + internal_type_it != arrow_type_to_internal_type.end()) { - internal_nested_type = arrow_type_to_internal_type.at(arrow_type); + internal_nested_type = DataTypeFactory::instance().get(internal_type_it->second); } else { - throw Exception - { + throw Exception{ "The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + "\" is not supported for conversion from a " + format_name + " data format", ErrorCodes::CANNOT_CONVERT_TYPE}; From cf57a884952b4103ff5f05a5671883bdf53c849c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:13:29 +0300 Subject: [PATCH 03/12] Added a way to forbid static initialization of a class --- dbms/programs/main.cpp | 12 ++++++++++++ libs/libcommon/src/DateLUTImpl.cpp | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 3fbbcee0f15..760eae4298b 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -21,6 +21,7 @@ #include #include +#include /// Universal executable for various clickhouse applications @@ -130,8 +131,19 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) } +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + + int main(int argc_, char ** argv_) { + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); diff --git a/libs/libcommon/src/DateLUTImpl.cpp b/libs/libcommon/src/DateLUTImpl.cpp index 3f812accb48..d6179373b4b 100644 --- a/libs/libcommon/src/DateLUTImpl.cpp +++ b/libs/libcommon/src/DateLUTImpl.cpp @@ -44,9 +44,16 @@ UInt8 getDayOfWeek(const cctz::civil_day & date) } +__attribute__((__weak__)) extern bool inside_main; + DateLUTImpl::DateLUTImpl(const std::string & time_zone_) : time_zone(time_zone_) { + /// DateLUT should not be initialized in global constructors for the following reasons: + /// 1. It is too heavy. + if (&inside_main) + assert(inside_main); + size_t i = 0; time_t start_of_day = DATE_LUT_MIN; From 509717dea79ca0327303fad4524decff595e8a92 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 20:18:19 +0300 Subject: [PATCH 04/12] Added integration test --- .../test_timezone_config/__init__.py | 0 .../test_timezone_config/configs/config.xml | 4 ++++ .../integration/test_timezone_config/test.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+) create mode 100644 dbms/tests/integration/test_timezone_config/__init__.py create mode 100644 dbms/tests/integration/test_timezone_config/configs/config.xml create mode 100644 dbms/tests/integration/test_timezone_config/test.py diff --git a/dbms/tests/integration/test_timezone_config/__init__.py b/dbms/tests/integration/test_timezone_config/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_timezone_config/configs/config.xml b/dbms/tests/integration/test_timezone_config/configs/config.xml new file mode 100644 index 00000000000..46c16a7688d --- /dev/null +++ b/dbms/tests/integration/test_timezone_config/configs/config.xml @@ -0,0 +1,4 @@ + + + America/Los_Angeles + diff --git a/dbms/tests/integration/test_timezone_config/test.py b/dbms/tests/integration/test_timezone_config/test.py new file mode 100644 index 00000000000..69f388bd459 --- /dev/null +++ b/dbms/tests/integration/test_timezone_config/test.py @@ -0,0 +1,17 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=['configs/config.xml']) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_check_client_logs_level(start_cluster): + assert TSV(instance.query("SELECT toDateTime(1111111111)")) == TSV("2005-03-17 17:58:31\n") From eb15c9416aa2659328f2986d50c212bbe1c4372c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 21:00:40 +0300 Subject: [PATCH 05/12] Fixed style --- dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 0aed11a48c9..0cd5ffb03e0 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -313,8 +313,7 @@ namespace DB } else { - throw Exception{ - "The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + throw Exception{"The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + "\" is not supported for conversion from a " + format_name + " data format", ErrorCodes::CANNOT_CONVERT_TYPE}; } From 216b05b71b67b14171db477782b570eee16580ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Aug 2019 21:33:24 +0300 Subject: [PATCH 06/12] Fixed build --- libs/libcommon/src/DateLUTImpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/libcommon/src/DateLUTImpl.cpp b/libs/libcommon/src/DateLUTImpl.cpp index d6179373b4b..51f5ceb759c 100644 --- a/libs/libcommon/src/DateLUTImpl.cpp +++ b/libs/libcommon/src/DateLUTImpl.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #define DATE_LUT_MIN 0 From 3fdcc4ab308632e0e0eaefd38727e30680c0d2a4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:14:20 +0300 Subject: [PATCH 07/12] Fixed typo in README.md --- dbms/tests/integration/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 06819af7668..f608a10deaf 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -34,7 +34,7 @@ set the following environment variables: ### Running with runner script -The only requirement is fresh docker configured docker. +The only requirement is fresh configured docker. Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. From 48bf4abdf0b28c57a10ec44ce1492f7828d9612c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:14:47 +0300 Subject: [PATCH 08/12] Fixed function name --- dbms/tests/integration/test_timezone_config/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_timezone_config/test.py b/dbms/tests/integration/test_timezone_config/test.py index 69f388bd459..572c1f17255 100644 --- a/dbms/tests/integration/test_timezone_config/test.py +++ b/dbms/tests/integration/test_timezone_config/test.py @@ -13,5 +13,5 @@ def start_cluster(): finally: cluster.shutdown() -def test_check_client_logs_level(start_cluster): +def test_check_timezone_config(start_cluster): assert TSV(instance.query("SELECT toDateTime(1111111111)")) == TSV("2005-03-17 17:58:31\n") From da5e7f31c4eb82a0f55624ae7d62652fda6e62f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:25:34 +0300 Subject: [PATCH 09/12] Updated README --- dbms/tests/integration/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index f608a10deaf..a0a5322e5a6 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -34,7 +34,8 @@ set the following environment variables: ### Running with runner script -The only requirement is fresh configured docker. +The only requirement is fresh configured docker and +docker pull yandex/clickhouse-integration-tests-runner Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. From 4cb640717b935dee4e1d5e877f9ad0c7f25b49b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Aug 2019 16:28:27 +0300 Subject: [PATCH 10/12] Fixed test --- dbms/tests/integration/test_timezone_config/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_timezone_config/test.py b/dbms/tests/integration/test_timezone_config/test.py index 572c1f17255..22e11daa72e 100644 --- a/dbms/tests/integration/test_timezone_config/test.py +++ b/dbms/tests/integration/test_timezone_config/test.py @@ -14,4 +14,4 @@ def start_cluster(): cluster.shutdown() def test_check_timezone_config(start_cluster): - assert TSV(instance.query("SELECT toDateTime(1111111111)")) == TSV("2005-03-17 17:58:31\n") + assert node.query("SELECT toDateTime(1111111111)") == "2005-03-17 17:58:31\n" From 89e86e1d41734f3612524c54422dc0e1cbb1bd9f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Sep 2019 22:57:09 +0300 Subject: [PATCH 11/12] Slightly better config management in integration tests --- ...common_instance_config.xml => 0_common_instance_config.xml} | 0 dbms/tests/integration/helpers/cluster.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename dbms/tests/integration/helpers/{common_instance_config.xml => 0_common_instance_config.xml} (100%) diff --git a/dbms/tests/integration/helpers/common_instance_config.xml b/dbms/tests/integration/helpers/0_common_instance_config.xml similarity index 100% rename from dbms/tests/integration/helpers/common_instance_config.xml rename to dbms/tests/integration/helpers/0_common_instance_config.xml diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 1288aaa23f2..aadd2e70a52 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -723,7 +723,8 @@ class ClickHouseInstance: os.mkdir(config_d_dir) os.mkdir(users_d_dir) - shutil.copy(p.join(HELPERS_DIR, 'common_instance_config.xml'), config_d_dir) + # The file is named with 0_ prefix to be processed before other configuration overloads. + shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), config_d_dir) # Generate and write macros file macros = self.macros.copy() From 1903053aa1a848134ad8efc5e16429279430ccb8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Sep 2019 22:58:48 +0300 Subject: [PATCH 12/12] Fixed error in test --- dbms/tests/integration/test_timezone_config/configs/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_timezone_config/configs/config.xml b/dbms/tests/integration/test_timezone_config/configs/config.xml index 46c16a7688d..c601a1d09ef 100644 --- a/dbms/tests/integration/test_timezone_config/configs/config.xml +++ b/dbms/tests/integration/test_timezone_config/configs/config.xml @@ -1,4 +1,4 @@ - America/Los_Angeles + America/Los_Angeles