From 9880c628393ae52d82d774ce5f88f5f0f13c2b7b Mon Sep 17 00:00:00 2001 From: Evgeniy Gatov Date: Wed, 22 Apr 2015 20:56:27 +0300 Subject: [PATCH 01/21] libzkutil: tiny [#METR-13470]. --- libs/libzkutil/include/zkutil/Increment.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/libzkutil/include/zkutil/Increment.h b/libs/libzkutil/include/zkutil/Increment.h index 5cd8f02316c..37b698d0290 100644 --- a/libs/libzkutil/include/zkutil/Increment.h +++ b/libs/libzkutil/include/zkutil/Increment.h @@ -2,13 +2,13 @@ #include -namespace zkutil +namespace zkutil { class Increment { public: - Increment(ZooKeeperPtr zk_, const std::string & path_) + Increment(ZooKeeperPtr zk_, const std::string & path_) : zk(zk_), path(path_) { zk->createAncestors(path); @@ -34,8 +34,8 @@ public: { success = zk->tryCreate(path, std::to_string(result), zkutil::CreateMode::Persistent) == ZOK; } - } - while(!success); + } + while (!success); return result; } @@ -45,4 +45,4 @@ private: Logger * log = &Logger::get("zkutil::Increment"); }; -} \ No newline at end of file +} From 4fca014e1b4442bba05cdd663379d53538a79555 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 9 Jun 2015 19:12:51 +0300 Subject: [PATCH 02/21] dbms: reload initially failed dictionaries with exponential backoff [#METR-16702] --- .../include/DB/Dictionaries/CacheDictionary.h | 2 + dbms/include/DB/Dictionaries/FlatDictionary.h | 18 ++- .../DB/Dictionaries/HashedDictionary.h | 17 ++- dbms/include/DB/Dictionaries/IDictionary.h | 2 + .../DB/Interpreters/ExternalDictionaries.h | 14 +- .../src/Interpreters/ExternalDictionaries.cpp | 138 ++++++++++++++---- 6 files changed, 155 insertions(+), 36 deletions(-) diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index dcf89bc7d0b..766b4b4fd5a 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -42,6 +42,8 @@ public: : CacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} {} + std::exception_ptr getCreationException() const override { return {}; } + std::string getName() const override { return name; } std::string getTypeName() const override { return "Cache"; } diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 43629049dd4..629fe2ddd2e 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -25,8 +25,17 @@ public: source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) { createAttributes(); - loadData(); - calculateBytesAllocated(); + + try + { + loadData(); + calculateBytesAllocated(); + } + catch (...) + { + creation_exception = std::current_exception(); + } + creation_time = std::chrono::system_clock::now(); } @@ -34,6 +43,8 @@ public: : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} {} + std::exception_ptr getCreationException() const override { return creation_exception; } + std::string getName() const override { return name; } std::string getTypeName() const override { return "Flat"; } @@ -398,10 +409,11 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; + mutable std::atomic query_count; std::chrono::time_point creation_time; - mutable std::atomic query_count; + std::exception_ptr creation_exception; }; } diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index 3eb29c7e8e6..d07a67ae7ed 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -22,8 +22,17 @@ public: source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) { createAttributes(); - loadData(); - calculateBytesAllocated(); + + try + { + loadData(); + calculateBytesAllocated(); + } + catch (...) + { + creation_exception = std::current_exception(); + } + creation_time = std::chrono::system_clock::now(); } @@ -31,6 +40,8 @@ public: : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} {} + std::exception_ptr getCreationException() const override { return creation_exception; } + std::string getName() const override { return name; } std::string getTypeName() const override { return "Hashed"; } @@ -389,6 +400,8 @@ private: mutable std::atomic query_count{}; std::chrono::time_point creation_time; + + std::exception_ptr creation_exception; }; } diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index 2981c46a816..446e4fe593f 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -24,6 +24,8 @@ class IDictionary public: using id_t = std::uint64_t; + virtual std::exception_ptr getCreationException() const = 0; + virtual std::string getName() const = 0; virtual std::string getTypeName() const = 0; diff --git a/dbms/include/DB/Interpreters/ExternalDictionaries.h b/dbms/include/DB/Interpreters/ExternalDictionaries.h index 4bf27d8bbee..56464a631dc 100644 --- a/dbms/include/DB/Interpreters/ExternalDictionaries.h +++ b/dbms/include/DB/Interpreters/ExternalDictionaries.h @@ -1,23 +1,23 @@ #pragma once +#include #include #include #include #include #include +#include #include #include #include #include #include #include -#include namespace DB { class Context; -class IDictionary; /** Manages user-defined dictionaries. * Monitors configuration file and automatically reloads dictionaries in a separate thread. @@ -50,8 +50,16 @@ private: std::exception_ptr exception; }; + struct failed_dictionary_info final + { + std::unique_ptr dict; + std::chrono::system_clock::time_point next_attempt_time; + std::uint64_t error_count; + }; + std::unordered_map dictionaries; std::unordered_map update_times; + std::unordered_map failed_dictionaries; std::mt19937_64 rnd_engine{getSeed()}; Context & context; @@ -81,7 +89,7 @@ private: { timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); - return ts.tv_nsec ^ getpid(); + return static_cast(ts.tv_nsec ^ getpid()); } public: diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 8d62fb6aa3f..2f2d8e2cb32 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -6,6 +6,15 @@ #include #include + +namespace +{ + /// 5 seconds + const auto backoff_initial_sec = 5; + /// 10 minutes + const auto backoff_max_sec = 10 * 60; +} + namespace DB { @@ -41,6 +50,63 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) for (const auto & config_path : config_paths) reloadFromFile(config_path, throw_on_error); + /// list of recreated dictionaries to perform delayed removal from unordered_map + std::list recreated_failed_dictionaries; + + /// retry loading failed dictionaries + for (auto & failed_dictionary : failed_dictionaries) + { + if (std::chrono::system_clock::now() < failed_dictionary.second.next_attempt_time) + continue; + + const auto & name = failed_dictionary.first; + + try + { + auto dict_ptr = failed_dictionary.second.dict->clone(); + if (dict_ptr->getCreationException()) + { + /// recalculate next attempt time + std::uniform_int_distribution distribution( + 0, std::exp2(failed_dictionary.second.error_count)); + + failed_dictionary.second.next_attempt_time = std::chrono::system_clock::now() + + std::chrono::seconds{ + std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine)) + }; + + ++failed_dictionary.second.error_count; + } + else + { + const std::lock_guard lock{dictionaries_mutex}; + + const auto dict_it = dictionaries.find(name); + if (dict_it->second.dict) + dict_it->second.dict->set(dict_ptr.release()); + else + dict_it->second.dict = std::make_shared>(dict_ptr.release()); + + /// erase stored exception on success + dict_it->second.exception = std::exception_ptr{}; + + const auto & lifetime = dict_ptr->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_times[name] = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + + recreated_failed_dictionaries.push_back(name); + } + } + catch (...) + { + LOG_ERROR(log, "Failed reloading " << name << " dictionary due to unexpected error"); + } + } + + /// do not undertake further attempts to recreate these dictionaries + for (const auto & name : recreated_failed_dictionaries) + failed_dictionaries.erase(name); + /// periodic update for (auto & dictionary : dictionaries) { @@ -122,10 +188,10 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const } else { - auto it = last_modification_times.find(config_path); - if (it == std::end(last_modification_times)) - it = last_modification_times.emplace(config_path, Poco::Timestamp{0}).first; - auto & config_last_modified = it->second; + auto modification_time_it = last_modification_times.find(config_path); + if (modification_time_it == std::end(last_modification_times)) + modification_time_it = last_modification_times.emplace(config_path, Poco::Timestamp{0}).first; + auto & config_last_modified = modification_time_it->second; const auto last_modified = config_file.getLastModified(); if (last_modified > config_last_modified) @@ -163,12 +229,31 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const continue; } - auto it = dictionaries.find(name); - if (it != std::end(dictionaries)) - if (it->second.origin != config_path) - throw std::runtime_error{"Overriding dictionary from file " + it->second.origin}; + const auto dict_it = dictionaries.find(name); + if (dict_it != std::end(dictionaries)) + if (dict_it->second.origin != config_path) + throw std::runtime_error{"Overriding dictionary from file " + dict_it->second.origin}; auto dict_ptr = DictionaryFactory::instance().create(name, *config, key, context); + if (const auto exception_ptr = dict_ptr->getCreationException()) + { + const auto failed_dict_it = failed_dictionaries.find(name); + if (failed_dict_it != std::end(failed_dictionaries)) + { + failed_dict_it->second = failed_dictionary_info{ + std::move(dict_ptr), + std::chrono::system_clock::now() + std::chrono::seconds{backoff_initial_sec} + }; + } + else + failed_dictionaries.emplace(name, failed_dictionary_info{ + std::move(dict_ptr), + std::chrono::system_clock::now() + std::chrono::seconds{backoff_initial_sec} + }); + + std::rethrow_exception(exception_ptr); + } + if (!dict_ptr->isCached()) { const auto & lifetime = dict_ptr->getLifetime(); @@ -183,42 +268,38 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const } } + const std::lock_guard lock{dictionaries_mutex}; + /// add new dictionary or update an existing version - if (it == std::end(dictionaries)) - { - const std::lock_guard lock{dictionaries_mutex}; + if (dict_it == std::end(dictionaries)) dictionaries.emplace(name, dictionary_info{ std::make_shared>(dict_ptr.release()), config_path }); - } else { - if (it->second.dict) - it->second.dict->set(dict_ptr.release()); + if (dict_it->second.dict) + dict_it->second.dict->set(dict_ptr.release()); else - { - const std::lock_guard lock{dictionaries_mutex}; - it->second.dict = std::make_shared>(dict_ptr.release()); - } + dict_it->second.dict = std::make_shared>(dict_ptr.release()); /// erase stored exception on success - it->second.exception = std::exception_ptr{}; + dict_it->second.exception = std::exception_ptr{}; + failed_dictionaries.erase(name); } } catch (...) { if (!name.empty()) { + const std::lock_guard lock{dictionaries_mutex}; + const auto exception_ptr = std::current_exception(); - const auto it = dictionaries.find(name); - if (it == std::end(dictionaries)) - { - const std::lock_guard lock{dictionaries_mutex}; + const auto dict_it = dictionaries.find(name); + if (dict_it == std::end(dictionaries)) dictionaries.emplace(name, dictionary_info{nullptr, config_path, exception_ptr}); - } else - it->second.exception = exception_ptr; + dict_it->second.exception = exception_ptr; } try @@ -253,16 +334,17 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const MultiVersion::Version ExternalDictionaries::getDictionary(const std::string & name) const { const std::lock_guard lock{dictionaries_mutex}; - const auto it = dictionaries.find(name); + const auto it = dictionaries.find(name); if (it == std::end(dictionaries)) throw Exception{ "No such dictionary: " + name, ErrorCodes::BAD_ARGUMENTS }; - if (!it->second.dict && it->second.exception) - std::rethrow_exception(it->second.exception); + if (!it->second.dict) + it->second.exception ? std::rethrow_exception(it->second.exception) : + throw Exception{"No dictionary", ErrorCodes::LOGICAL_ERROR}; return it->second.dict->get(); } From b64f094d842c588595c0f5e9e208c8ce748d69e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2015 21:58:18 +0300 Subject: [PATCH 03/21] dbms: tiny improvement [#METR-2944]. --- dbms/include/DB/DataTypes/DataTypeAggregateFunction.h | 2 +- dbms/include/DB/DataTypes/DataTypeArray.h | 2 +- dbms/include/DB/DataTypes/DataTypeDate.h | 2 +- dbms/include/DB/DataTypes/DataTypeDateTime.h | 2 +- dbms/include/DB/DataTypes/DataTypeExpression.h | 2 +- dbms/include/DB/DataTypes/DataTypeFixedString.h | 2 +- dbms/include/DB/DataTypes/DataTypeNested.h | 2 +- dbms/include/DB/DataTypes/DataTypeSet.h | 2 +- dbms/include/DB/DataTypes/DataTypeString.h | 2 +- dbms/include/DB/DataTypes/DataTypeTuple.h | 2 +- dbms/include/DB/DataTypes/DataTypesNumberFixed.h | 4 ++-- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h b/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h index 07ee9f432ac..30742f5946f 100644 --- a/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h @@ -13,7 +13,7 @@ using Poco::SharedPtr; /** Тип - состояние агрегатной функции. * Параметры типа - это агрегатная функция, типы её аргументов и её параметры (для параметрических агрегатных функций). */ -class DataTypeAggregateFunction : public IDataType +class DataTypeAggregateFunction final : public IDataType { private: AggregateFunctionPtr function; diff --git a/dbms/include/DB/DataTypes/DataTypeArray.h b/dbms/include/DB/DataTypes/DataTypeArray.h index de111073468..8dbf99df3da 100644 --- a/dbms/include/DB/DataTypes/DataTypeArray.h +++ b/dbms/include/DB/DataTypes/DataTypeArray.h @@ -9,7 +9,7 @@ namespace DB using Poco::SharedPtr; -class DataTypeArray : public IDataType +class DataTypeArray final : public IDataType { private: /// Тип элементов массивов. diff --git a/dbms/include/DB/DataTypes/DataTypeDate.h b/dbms/include/DB/DataTypes/DataTypeDate.h index fadb19b5a5f..52830bc4ce3 100644 --- a/dbms/include/DB/DataTypes/DataTypeDate.h +++ b/dbms/include/DB/DataTypes/DataTypeDate.h @@ -10,7 +10,7 @@ namespace DB { -class DataTypeDate : public IDataTypeNumberFixed +class DataTypeDate final : public IDataTypeNumberFixed { public: DataTypeDate() {} diff --git a/dbms/include/DB/DataTypes/DataTypeDateTime.h b/dbms/include/DB/DataTypes/DataTypeDateTime.h index 1153173fc45..045ae089a00 100644 --- a/dbms/include/DB/DataTypes/DataTypeDateTime.h +++ b/dbms/include/DB/DataTypes/DataTypeDateTime.h @@ -10,7 +10,7 @@ namespace DB { -class DataTypeDateTime : public IDataTypeNumberFixed +class DataTypeDateTime final : public IDataTypeNumberFixed { public: DataTypeDateTime() {} diff --git a/dbms/include/DB/DataTypes/DataTypeExpression.h b/dbms/include/DB/DataTypes/DataTypeExpression.h index abd23e04523..262f2a99efa 100644 --- a/dbms/include/DB/DataTypes/DataTypeExpression.h +++ b/dbms/include/DB/DataTypes/DataTypeExpression.h @@ -9,7 +9,7 @@ namespace DB /** * Лямбда-выражение. */ -class DataTypeExpression : public IDataTypeDummy +class DataTypeExpression final : public IDataTypeDummy { private: DataTypes argument_types; diff --git a/dbms/include/DB/DataTypes/DataTypeFixedString.h b/dbms/include/DB/DataTypes/DataTypeFixedString.h index faffb0353fa..af2543785ac 100644 --- a/dbms/include/DB/DataTypes/DataTypeFixedString.h +++ b/dbms/include/DB/DataTypes/DataTypeFixedString.h @@ -13,7 +13,7 @@ namespace DB using Poco::SharedPtr; -class DataTypeFixedString : public IDataType +class DataTypeFixedString final : public IDataType { private: size_t n; diff --git a/dbms/include/DB/DataTypes/DataTypeNested.h b/dbms/include/DB/DataTypes/DataTypeNested.h index d6092574762..de5aa81c43f 100644 --- a/dbms/include/DB/DataTypes/DataTypeNested.h +++ b/dbms/include/DB/DataTypes/DataTypeNested.h @@ -9,7 +9,7 @@ namespace DB using Poco::SharedPtr; -class DataTypeNested : public IDataType +class DataTypeNested final : public IDataType { private: /// Имена и типы вложенных массивов. diff --git a/dbms/include/DB/DataTypes/DataTypeSet.h b/dbms/include/DB/DataTypes/DataTypeSet.h index df62e806cd8..1486979648b 100644 --- a/dbms/include/DB/DataTypes/DataTypeSet.h +++ b/dbms/include/DB/DataTypes/DataTypeSet.h @@ -9,7 +9,7 @@ namespace DB /** Тип данных, соответствующий множеству значений в секции IN. * Используется только как промежуточный вариант при вычислении выражений. */ -class DataTypeSet : public IDataTypeDummy +class DataTypeSet final : public IDataTypeDummy { public: std::string getName() const { return "Set"; } diff --git a/dbms/include/DB/DataTypes/DataTypeString.h b/dbms/include/DB/DataTypes/DataTypeString.h index 6804236d7b7..b474a7e6178 100644 --- a/dbms/include/DB/DataTypes/DataTypeString.h +++ b/dbms/include/DB/DataTypes/DataTypeString.h @@ -13,7 +13,7 @@ namespace DB using Poco::SharedPtr; -class DataTypeString : public IDataType +class DataTypeString final : public IDataType { public: using FieldType = String; diff --git a/dbms/include/DB/DataTypes/DataTypeTuple.h b/dbms/include/DB/DataTypes/DataTypeTuple.h index 7932d6853fc..5b0b45db805 100644 --- a/dbms/include/DB/DataTypes/DataTypeTuple.h +++ b/dbms/include/DB/DataTypes/DataTypeTuple.h @@ -15,7 +15,7 @@ namespace DB * Также может быть использовать в качестве столбца - результата выполнения запроса. * Не может быть сохранён в таблицы. */ -class DataTypeTuple : public IDataType +class DataTypeTuple final : public IDataType { private: DataTypes elems; diff --git a/dbms/include/DB/DataTypes/DataTypesNumberFixed.h b/dbms/include/DB/DataTypes/DataTypesNumberFixed.h index e3fbd275579..3ea76b2d920 100644 --- a/dbms/include/DB/DataTypes/DataTypesNumberFixed.h +++ b/dbms/include/DB/DataTypes/DataTypesNumberFixed.h @@ -13,11 +13,11 @@ template struct DataTypeFromFieldType; #define DEFINE_DATA_TYPE_NUMBER_FIXED(TYPE) \ - class DataType ## TYPE : public IDataTypeNumberFixed \ + class DataType ## TYPE final : public IDataTypeNumberFixed \ { \ public: \ std::string getName() const { return #TYPE; } \ - DataTypePtr clone() const { return new DataType ## TYPE; } \ + DataTypePtr clone() const { return new DataType ## TYPE; } \ }; \ \ template <> struct DataTypeFromFieldType \ From 18a8e1f0ca9ffb6395b075cbf3e4aeb431bcab07 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2015 22:43:06 +0300 Subject: [PATCH 04/21] dbms: StorageBuffer: added missing support for defaults [#METR-16722]. --- dbms/include/DB/Storages/StorageBuffer.h | 12 ++++++++++-- dbms/src/Storages/StorageBuffer.cpp | 18 ++++++++++++++---- dbms/src/Storages/StorageFactory.cpp | 4 +++- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/dbms/include/DB/Storages/StorageBuffer.h b/dbms/include/DB/Storages/StorageBuffer.h index 9efb458b858..57098478f89 100644 --- a/dbms/include/DB/Storages/StorageBuffer.h +++ b/dbms/include/DB/Storages/StorageBuffer.h @@ -48,7 +48,11 @@ public: /** num_shards - уровень внутреннего параллелизма (количество независимых буферов) * Буфер сбрасывается, если превышены все минимальные пороги или хотя бы один из максимальных. */ - static StoragePtr create(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, + static StoragePtr create(const std::string & name_, NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + Context & context_, size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, const String & destination_database_, const String & destination_table_); @@ -113,7 +117,11 @@ private: /// Выполняет сброс данных по таймауту. std::thread flush_thread; - StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, + StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + Context & context_, size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, const String & destination_database_, const String & destination_table_); diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index ba902d27ca4..2ed9176f2e2 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -13,19 +13,29 @@ namespace DB { -StoragePtr StorageBuffer::create(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, +StoragePtr StorageBuffer::create(const std::string & name_, NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + Context & context_, size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, const String & destination_database_, const String & destination_table_) { return (new StorageBuffer{ - name_, columns_, context_, num_shards_, min_thresholds_, max_thresholds_, destination_database_, destination_table_})->thisPtr(); + name_, columns_, materialized_columns_, alias_columns_, column_defaults_, + context_, num_shards_, min_thresholds_, max_thresholds_, destination_database_, destination_table_})->thisPtr(); } -StorageBuffer::StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, +StorageBuffer::StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + Context & context_, size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, const String & destination_database_, const String & destination_table_) - : name(name_), columns(columns_), context(context_), + : IStorage{materialized_columns_, alias_columns_, column_defaults_}, + name(name_), columns(columns_), context(context_), num_shards(num_shards_), buffers(num_shards_), min_thresholds(min_thresholds_), max_thresholds(max_thresholds_), destination_database(destination_database_), destination_table(destination_table_), diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 9251d2e87b9..da325c58589 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -373,7 +373,9 @@ StoragePtr StorageFactory::get( size_t max_bytes = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[8]).value); return StorageBuffer::create( - table_name, columns, context, + table_name, columns, + materialized_columns, alias_columns, column_defaults, + context, num_buckets, {min_time, min_rows, min_bytes}, {max_time, max_rows, max_bytes}, destination_database, destination_table); } From c65b2f6a166d3a7cc4b7ce22f637c904485a573f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2015 22:51:45 +0300 Subject: [PATCH 05/21] dbms: added test [#METR-16722]. --- .../0_stateless/00168_buffer_defaults.reference | 9 +++++++++ .../queries/0_stateless/00168_buffer_defaults.sql | 12 ++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00168_buffer_defaults.reference create mode 100644 dbms/tests/queries/0_stateless/00168_buffer_defaults.sql diff --git a/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference b/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference new file mode 100644 index 00000000000..3f70f0ee2c4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference @@ -0,0 +1,9 @@ +EventDate Date +UTCEventTime DateTime +MoscowEventDate Date DEFAULT toDate(UTCEventTime) +EventDate Date +UTCEventTime DateTime +MoscowEventDate Date DEFAULT toDate(UTCEventTime) +2015-06-09 2015-06-09 01:02:03 2015-06-09 +2015-06-09 2015-06-09 01:02:03 2015-06-09 +2015-06-09 2015-06-09 01:02:03 2015-06-09 diff --git a/dbms/tests/queries/0_stateless/00168_buffer_defaults.sql b/dbms/tests/queries/0_stateless/00168_buffer_defaults.sql new file mode 100644 index 00000000000..a42105f6b12 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00168_buffer_defaults.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test.mt; +DROP TABLE IF EXISTS test.mt_buffer; +CREATE TABLE test.mt (EventDate Date, UTCEventTime DateTime, MoscowEventDate Date DEFAULT toDate(UTCEventTime)) ENGINE = MergeTree(EventDate, UTCEventTime, 8192); +CREATE TABLE test.mt_buffer AS test.mt ENGINE = Buffer(test, mt, 16, 10, 100, 10000, 1000000, 10000000, 100000000); +DESC TABLE test.mt; +DESC TABLE test.mt_buffer; +INSERT INTO test.mt (EventDate, UTCEventTime) VALUES ('2015-06-09', '2015-06-09 01:02:03'); +SELECT * FROM test.mt_buffer; +INSERT INTO test.mt_buffer (EventDate, UTCEventTime) VALUES ('2015-06-09', '2015-06-09 01:02:03'); +SELECT * FROM test.mt_buffer; +DROP TABLE test.mt_buffer; +DROP TABLE test.mt; From d1df9bd43b11c86e205a0b188d7e1f29d3a578ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2015 23:55:15 +0300 Subject: [PATCH 06/21] dbms: Join: added support for constant keys [#METR-16762]. --- dbms/src/Interpreters/Join.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 38fda859d68..b614c76fa57 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -365,10 +365,21 @@ bool Join::insertFromBlock(const Block & block) size_t keys_size = key_names_right.size(); ConstColumnPlainPtrs key_columns(keys_size); + /// Редкий случай, когда ключи являются константами. Чтобы не поддерживать отдельный код, материализуем их. + Columns materialized_columns; + /// Запоминаем столбцы ключей, с которыми будем работать for (size_t i = 0; i < keys_size; ++i) + { key_columns[i] = block.getByName(key_names_right[i]).column; + if (key_columns[i]->isConst()) + { + materialized_columns.emplace_back(dynamic_cast(*key_columns[i]).convertToFullColumn()); + key_columns[i] = materialized_columns.back(); + } + } + size_t rows = block.rows(); blocks.push_back(block); @@ -378,7 +389,7 @@ bool Join::insertFromBlock(const Block & block) for (const auto & name : key_names_right) stored_block->erase(stored_block->getPositionByName(name)); - /// Редкий случай, когда соединяемые столбцы являеются константами. Чтобы не поддерживать отдельный код, материализуем их. + /// Редкий случай, когда соединяемые столбцы являются константами. Чтобы не поддерживать отдельный код, материализуем их. for (size_t i = 0, size = stored_block->columns(); i < size; ++i) { ColumnPtr col = stored_block->getByPosition(i).column; @@ -515,10 +526,21 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const size_t keys_size = key_names_left.size(); ConstColumnPlainPtrs key_columns(keys_size); + /// Редкий случай, когда ключи являются константами. Чтобы не поддерживать отдельный код, материализуем их. + Columns materialized_columns; + /// Запоминаем столбцы ключей, с которыми будем работать for (size_t i = 0; i < keys_size; ++i) + { key_columns[i] = block.getByName(key_names_left[i]).column; + if (key_columns[i]->isConst()) + { + materialized_columns.emplace_back(dynamic_cast(*key_columns[i]).convertToFullColumn()); + key_columns[i] = materialized_columns.back(); + } + } + /// Добавляем в блок новые столбцы. size_t num_columns_to_add = sample_block.columns(); ColumnPlainPtrs added_columns(num_columns_to_add); From b417ef015fcaa9e407c292cf02984a6c1abd86db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2015 23:56:40 +0300 Subject: [PATCH 07/21] dbms: added test [#METR-16762]. --- .../00169_join_constant_keys.reference | 3 +++ .../0_stateless/00169_join_constant_keys.sql | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00169_join_constant_keys.reference create mode 100644 dbms/tests/queries/0_stateless/00169_join_constant_keys.sql diff --git a/dbms/tests/queries/0_stateless/00169_join_constant_keys.reference b/dbms/tests/queries/0_stateless/00169_join_constant_keys.reference new file mode 100644 index 00000000000..f957808e94b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00169_join_constant_keys.reference @@ -0,0 +1,3 @@ +1 0 999 +2 0 999 +3 0 999 diff --git a/dbms/tests/queries/0_stateless/00169_join_constant_keys.sql b/dbms/tests/queries/0_stateless/00169_join_constant_keys.sql new file mode 100644 index 00000000000..93c98271ca7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00169_join_constant_keys.sql @@ -0,0 +1,17 @@ +SELECT + key1, + key2, + table_1 +FROM +( + SELECT + arrayJoin([1, 2, 3]) AS key1, + 0 AS key2, + 999 AS table_1 +) ALL INNER JOIN +( + SELECT + arrayJoin([1, 3, 2]) AS key1, + 0 AS key2, + 999 AS table_1 +) USING key2, key1; From 161d30c9023912acd42634cb7ec5599e2979d336 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Jun 2015 00:34:45 +0300 Subject: [PATCH 08/21] dbms: adding checking of types when JOIN (incomplete) [#METR-16762]. --- dbms/include/DB/Interpreters/Join.h | 3 +++ dbms/src/Interpreters/Join.cpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/dbms/include/DB/Interpreters/Join.h b/dbms/include/DB/Interpreters/Join.h index 034390e8492..ca9983348d5 100644 --- a/dbms/include/DB/Interpreters/Join.h +++ b/dbms/include/DB/Interpreters/Join.h @@ -250,6 +250,9 @@ private: /// Проверить не превышены ли допустимые размеры множества bool checkSizeLimits() const; + + /// Кинуть исключение, если в блоках не совпадают типы ключей. + void checkTypesOfKeys(const Block & block_left, const Block & block_right) const; }; typedef Poco::SharedPtr JoinPtr; diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index b614c76fa57..3a3c84b19a8 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -650,10 +650,25 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const } +void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) const +{ + size_t keys_size = key_names_left.size(); + + for (size_t i = 0; i < keys_size; ++i) + if (block_left.getByName(key_names_left[i]).type->getName() != block_right.getByName(key_names_right[i]).type->getName()) + throw Exception("Type mismatch of columns to JOIN by: " + + key_names_left[i] + " " + block_left.getByName(key_names_left[i]).type->getName() + " at left, " + + key_names_right[i] + " " + block_right.getByName(key_names_right[i]).type->getName() + " at right, ", + ErrorCodes::TYPE_MISMATCH); +} + + void Join::joinBlock(Block & block) const { Poco::ScopedReadRWLock lock(rwlock); +// checkTypesOfKeys(block, sample_block); + if (kind == ASTJoin::Left && strictness == ASTJoin::Any) joinBlockImpl(block, maps_any); else if (kind == ASTJoin::Inner && strictness == ASTJoin::Any) From 9237e308fbbf8b8185111ea5ff77331a5c0cc2d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Jun 2015 02:50:22 +0300 Subject: [PATCH 09/21] dbms: JOIN: added checking of types of key columns [#METR-16762]. --- dbms/include/DB/Interpreters/Join.h | 3 ++- dbms/src/Interpreters/Join.cpp | 30 ++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/dbms/include/DB/Interpreters/Join.h b/dbms/include/DB/Interpreters/Join.h index ca9983348d5..9a413e28cf9 100644 --- a/dbms/include/DB/Interpreters/Join.h +++ b/dbms/include/DB/Interpreters/Join.h @@ -222,7 +222,8 @@ private: bool keys_fit_128_bits; Sizes key_sizes; - Block sample_block; + Block sample_block_with_columns_to_add; + Block sample_block_with_keys; Logger * log; diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 3a3c84b19a8..ce9479e2f91 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -339,15 +339,19 @@ void Join::setSampleBlock(const Block & block) /// Выберем, какую структуру данных для множества использовать. init(chooseMethod(key_columns, keys_fit_128_bits, key_sizes)); - sample_block = block; + sample_block_with_columns_to_add = block; - /// Удаляем из sample_block ключевые столбцы, так как они не нужны. + /// Удаляем из sample_block_with_columns_to_add ключевые столбцы. for (const auto & name : key_names_right) - sample_block.erase(sample_block.getPositionByName(name)); - - for (size_t i = 0, size = sample_block.columns(); i < size; ++i) { - auto & column = sample_block.unsafeGetByPosition(i); + size_t pos = sample_block_with_columns_to_add.getPositionByName(name); + sample_block_with_keys.insert(sample_block_with_columns_to_add.unsafeGetByPosition(pos)); + sample_block_with_columns_to_add.erase(pos); + } + + for (size_t i = 0, size = sample_block_with_columns_to_add.columns(); i < size; ++i) + { + auto & column = sample_block_with_columns_to_add.unsafeGetByPosition(i); if (!column.column) column.column = column.type->createColumn(); } @@ -542,14 +546,14 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const } /// Добавляем в блок новые столбцы. - size_t num_columns_to_add = sample_block.columns(); + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); ColumnPlainPtrs added_columns(num_columns_to_add); size_t existing_columns = block.columns(); for (size_t i = 0; i < num_columns_to_add; ++i) { - const ColumnWithNameAndType & src_column = sample_block.getByPosition(i); + const ColumnWithNameAndType & src_column = sample_block_with_columns_to_add.getByPosition(i); ColumnWithNameAndType new_column = src_column.cloneEmpty(); block.insert(new_column); added_columns[i] = new_column.column; @@ -658,7 +662,7 @@ void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) if (block_left.getByName(key_names_left[i]).type->getName() != block_right.getByName(key_names_right[i]).type->getName()) throw Exception("Type mismatch of columns to JOIN by: " + key_names_left[i] + " " + block_left.getByName(key_names_left[i]).type->getName() + " at left, " - + key_names_right[i] + " " + block_right.getByName(key_names_right[i]).type->getName() + " at right, ", + + key_names_right[i] + " " + block_right.getByName(key_names_right[i]).type->getName() + " at right", ErrorCodes::TYPE_MISMATCH); } @@ -667,7 +671,7 @@ void Join::joinBlock(Block & block) const { Poco::ScopedReadRWLock lock(rwlock); -// checkTypesOfKeys(block, sample_block); + checkTypesOfKeys(block, sample_block_with_keys); if (kind == ASTJoin::Left && strictness == ASTJoin::Any) joinBlockImpl(block, maps_any); @@ -703,7 +707,7 @@ void Join::joinTotals(Block & block) const else { /// Будем присоединять пустые totals - из одной строчки со значениями по-умолчанию. - totals_without_keys = sample_block.cloneEmpty(); + totals_without_keys = sample_block_with_columns_to_add.cloneEmpty(); for (size_t i = 0; i < totals_without_keys.columns(); ++i) { @@ -808,12 +812,12 @@ private: } /// Добавляем в блок новые столбцы. - size_t num_columns_right = parent.sample_block.columns(); + size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); ColumnPlainPtrs columns_right(num_columns_right); for (size_t i = 0; i < num_columns_right; ++i) { - const ColumnWithNameAndType & src_column = parent.sample_block.getByPosition(i); + const ColumnWithNameAndType & src_column = parent.sample_block_with_columns_to_add.getByPosition(i); ColumnWithNameAndType new_column = src_column.cloneEmpty(); block.insert(new_column); columns_right[i] = new_column.column; From b2bfa55a37370dd23a24173a3759443e3273901e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Jun 2015 02:54:37 +0300 Subject: [PATCH 10/21] dbms: fixed tests [#METR-16762]. --- dbms/tests/queries/0_stateless/00118_storage_join.sql | 2 +- dbms/tests/queries/0_stateless/00119_storage_join.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00118_storage_join.sql b/dbms/tests/queries/0_stateless/00118_storage_join.sql index f52e2c7e5c8..7ad702e8ad4 100644 --- a/dbms/tests/queries/0_stateless/00118_storage_join.sql +++ b/dbms/tests/queries/0_stateless/00118_storage_join.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test.join; -CREATE TABLE test.join (k UInt8, s String) ENGINE = Join(ANY, LEFT, k); +CREATE TABLE test.join (k UInt64, s String) ENGINE = Join(ANY, LEFT, k); USE test; diff --git a/dbms/tests/queries/0_stateless/00119_storage_join.sql b/dbms/tests/queries/0_stateless/00119_storage_join.sql index 934d05f0f65..9a0ead94bd7 100644 --- a/dbms/tests/queries/0_stateless/00119_storage_join.sql +++ b/dbms/tests/queries/0_stateless/00119_storage_join.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test.join; -CREATE TABLE test.join (s String, x Array(UInt8), k UInt8) ENGINE = Join(ANY, LEFT, k); +CREATE TABLE test.join (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, LEFT, k); USE test; From ea4f4420bac82f94689211fdb5e55fdc6680e314 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 10 Jun 2015 15:47:27 +0300 Subject: [PATCH 11/21] dbms: replace old lower/upper(UTF8) with vectorized ones [#METR-14764] --- dbms/include/DB/Functions/FunctionsString.h | 123 +++--------------- dbms/src/Functions/FunctionsString.cpp | 4 - .../00170_lower_upper_utf8.reference | 24 ++++ .../0_stateless/00170_lower_upper_utf8.sql | 29 +++++ 4 files changed, 68 insertions(+), 112 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00170_lower_upper_utf8.reference create mode 100644 dbms/tests/queries/0_stateless/00170_lower_upper_utf8.sql diff --git a/dbms/include/DB/Functions/FunctionsString.h b/dbms/include/DB/Functions/FunctionsString.h index 3025aa46942..4ab35f71ef8 100644 --- a/dbms/include/DB/Functions/FunctionsString.h +++ b/dbms/include/DB/Functions/FunctionsString.h @@ -202,46 +202,11 @@ struct LengthUTF8Impl }; -/** Переводит строку в нижний (верхний) регистр, в текущей локали, в однобайтовой кодировке. - */ -template +template struct LowerUpperImpl { static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets) - { - res_data.resize(data.size()); - res_offsets.assign(offsets); - array(&*data.begin(), &*data.end(), &*res_data.begin()); - } - - static void vector_fixed(const ColumnString::Chars_t & data, size_t n, - ColumnString::Chars_t & res_data) - { - res_data.resize(data.size()); - array(&*data.begin(), &*data.end(), &*res_data.begin()); - } - - static void constant(const std::string & data, std::string & res_data) - { - res_data.resize(data.size()); - array(reinterpret_cast(&*data.begin()), reinterpret_cast(&*data.end()), - reinterpret_cast(&*res_data.begin())); - } - -private: - static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) - { - for (; src < src_end; ++src, ++dst) - *dst = F(*src); - } -}; - -template -struct LowerUpperImplVectorized -{ - static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, - ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets) { res_data.resize(data.size()); res_offsets.assign(offsets); @@ -348,9 +313,14 @@ inline void UTF8CyrillicToCase(const UInt8 * & src, const UInt8 * const src_end, } }; +/** Если строка содержит текст в кодировке UTF-8 - перевести его в нижний (верхний) регистр. + * Замечание: предполагается, что после перевода символа в другой регистр, + * длина его мультибайтовой последовательности в UTF-8 не меняется. + * Иначе - поведение не определено. + */ template -struct LowerUpperUTF8ImplVectorized +struct LowerUpperUTF8Impl { static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets) @@ -487,59 +457,6 @@ private: }; -/** Если строка содержит текст в кодировке UTF-8 - перевести его в нижний (верхний) регистр. - * Замечание: предполагается, что после перевода символа в другой регистр, - * длина его мультибайтовой последовательности в UTF-8 не меняется. - * Иначе - поведение не определено. - */ -template -struct LowerUpperUTF8Impl -{ - static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, - ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets) - { - res_data.resize(data.size()); - res_offsets.assign(offsets); - array(&*data.begin(), &*data.end(), &*res_data.begin()); - } - - static void vector_fixed(const ColumnString::Chars_t & data, size_t n, - ColumnString::Chars_t & res_data) - { - res_data.resize(data.size()); - array(&*data.begin(), &*data.end(), &*res_data.begin()); - } - - static void constant(const std::string & data, std::string & res_data) - { - res_data.resize(data.size()); - array(reinterpret_cast(&*data.begin()), reinterpret_cast(&*data.end()), - reinterpret_cast(&*res_data.begin())); - } - -private: - static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) - { - static Poco::UTF8Encoding utf8; - - while (src < src_end) - { - int chars = utf8.convert(F(utf8.convert(src)), dst, src_end - src); - if (chars) - { - src += chars; - dst += chars; - } - else - { - ++src; - ++dst; - } - } - } -}; - - /** Разворачивает строку в байтах. */ struct ReverseImpl @@ -1676,32 +1593,22 @@ struct NameReverseUTF8 { static constexpr auto name = "reverseUTF8"; }; struct NameSubstring { static constexpr auto name = "substring"; }; struct NameSubstringUTF8 { static constexpr auto name = "substringUTF8"; }; -struct NameSSELower { static constexpr auto name = "sse_lower"; }; -struct NameSSEUpper { static constexpr auto name = "sse_upper"; }; -struct NameSSELowerUTF8 { static constexpr auto name = "sse_lowerUTF8"; }; -struct NameSSEUpperUTF8 { static constexpr auto name = "sse_upperUTF8"; }; - typedef FunctionStringOrArrayToT, NameEmpty, UInt8> FunctionEmpty; typedef FunctionStringOrArrayToT, NameNotEmpty, UInt8> FunctionNotEmpty; typedef FunctionStringOrArrayToT FunctionLength; typedef FunctionStringOrArrayToT FunctionLengthUTF8; -typedef FunctionStringToString, NameLower> FunctionLower; -typedef FunctionStringToString, NameUpper> FunctionUpper; -typedef FunctionStringToString, NameLowerUTF8> FunctionLowerUTF8; -typedef FunctionStringToString, NameUpperUTF8> FunctionUpperUTF8; +typedef FunctionStringToString, NameLower> FunctionLower; +typedef FunctionStringToString, NameUpper> FunctionUpper; +typedef FunctionStringToString< + LowerUpperUTF8Impl<'A', 'Z', Poco::Unicode::toLower, UTF8CyrillicToCase>, + NameLowerUTF8> FunctionLowerUTF8; +typedef FunctionStringToString< + LowerUpperUTF8Impl<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase>, + NameUpperUTF8> FunctionUpperUTF8; typedef FunctionStringToString FunctionReverse; typedef FunctionStringToString FunctionReverseUTF8; typedef FunctionStringNumNumToString FunctionSubstring; typedef FunctionStringNumNumToString FunctionSubstringUTF8; -using FunctionSSELower = FunctionStringToString, NameSSELower>; -using FunctionSSEUpper = FunctionStringToString, NameSSEUpper>; -using FunctionSSELowerUTF8 = FunctionStringToString< - LowerUpperUTF8ImplVectorized<'A', 'Z', Poco::Unicode::toLower, UTF8CyrillicToCase>, - NameSSELowerUTF8>; -using FunctionSSEUpperUTF8 = FunctionStringToString< - LowerUpperUTF8ImplVectorized<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase>, - NameSSEUpperUTF8>; - } diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index eadd6bc6884..17cda08cbc3 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -20,10 +20,6 @@ void registerFunctionsString(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); } } diff --git a/dbms/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/dbms/tests/queries/0_stateless/00170_lower_upper_utf8.reference new file mode 100644 index 00000000000..f202cb75513 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -0,0 +1,24 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/dbms/tests/queries/0_stateless/00170_lower_upper_utf8.sql new file mode 100644 index 00000000000..d3f1c6f6230 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -0,0 +1,29 @@ +select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; +select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; +select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; +select lowerUTF8('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; + +select upper('AAAAAAAAAAAAAAA012345789,.!AAAA' as str) = str; +select upperUTF8('AAAAAAAAAAAAAAA012345789,.!AAAA' as str) = str; +select upper('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'AAAAAAAAAAAAAAA012345789,.!AAAA'; +select upperUTF8('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'AAAAAAAAAAAAAAA012345789,.!AAAA'; + +select sum(lower(materialize('aaaaaaaaaaaaaaa012345789,.!aaaa') as str) = str) = count() array join range(16384) as n; +select sum(lowerUTF8(materialize('aaaaaaaaaaaaaaa012345789,.!aaaa') as str) = str) = count() array join range(16384) as n; +select sum(lower(materialize('AaAaAaAaAaAaAaA012345789,.!aAaA')) = materialize('aaaaaaaaaaaaaaa012345789,.!aaaa')) = count() array join range(16384) as n; +select sum(lowerUTF8(materialize('AaAaAaAaAaAaAaA012345789,.!aAaA')) = materialize('aaaaaaaaaaaaaaa012345789,.!aaaa')) = count() array join range(16384) as n; + +select sum(upper(materialize('AAAAAAAAAAAAAAA012345789,.!AAAA') as str) = str) = count() array join range(16384) as n; +select sum(upperUTF8(materialize('AAAAAAAAAAAAAAA012345789,.!AAAA') as str) = str) = count() array join range(16384) as n; +select sum(upper(materialize('AaAaAaAaAaAaAaA012345789,.!aAaA')) = materialize('AAAAAAAAAAAAAAA012345789,.!AAAA')) = count() array join range(16384) as n; +select sum(upperUTF8(materialize('AaAaAaAaAaAaAaA012345789,.!aAaA')) = materialize('AAAAAAAAAAAAAAA012345789,.!AAAA')) = count() array join range(16384) as n; + +select lower('aaaaАБВГAAAAaaAA') = 'aaaaАБВГaaaaaaaa'; +select upper('aaaaАБВГAAAAaaAA') = 'AAAAАБВГAAAAAAAA'; +select lowerUTF8('aaaaАБВГAAAAaaAA') = 'aaaaабвгaaaaaaaa'; +select upperUTF8('aaaaАБВГAAAAaaAA') = 'AAAAАБВГAAAAAAAA'; + +select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВГaaaaaaaa')) = count() array join range(16384) as n; +select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() array join range(16384) as n; +select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() array join range(16384) as n; +select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() array join range(16384) as n; From c4d1887a6fde50881381ed88b0718143b03f879b Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 10 Jun 2015 17:49:38 +0300 Subject: [PATCH 12/21] dbms: correctly (I hope) determine PREWHERE dependencies for defaults [#METR-16589] --- .../include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h index da584b0febb..2d2b3230fc9 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h @@ -43,9 +43,7 @@ public: { pre_column_names = prewhere_actions->getRequiredColumns(); - /// @todo somehow decide which injected columns belong to PREWHERE, optimizing reads - pre_column_names.insert(std::end(pre_column_names), - std::begin(injected_columns), std::end(injected_columns)); + injectRequiredColumns(pre_column_names); if (pre_column_names.empty()) pre_column_names.push_back(column_names[0]); From bc17c73ee49d18d8b64e79faead97869fa6c22c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Jun 2015 22:56:57 +0300 Subject: [PATCH 13/21] SipHash: allowed not to call update for empty strings (less than 0.3% performance degradation on all test data) [#METR-16781]. --- dbms/include/DB/Common/SipHash.h | 1 + dbms/src/Common/tests/sip_hash_perf.cpp | 71 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 dbms/src/Common/tests/sip_hash_perf.cpp diff --git a/dbms/include/DB/Common/SipHash.h b/dbms/include/DB/Common/SipHash.h index bcfe81b93ed..280d83a9b84 100644 --- a/dbms/include/DB/Common/SipHash.h +++ b/dbms/include/DB/Common/SipHash.h @@ -76,6 +76,7 @@ public: v3 = 0x7465646279746573ULL ^ k1; cnt = 0; + current_word = 0; } void update(const char * data, u64 size) diff --git a/dbms/src/Common/tests/sip_hash_perf.cpp b/dbms/src/Common/tests/sip_hash_perf.cpp new file mode 100644 index 00000000000..dcd397ad5b9 --- /dev/null +++ b/dbms/src/Common/tests/sip_hash_perf.cpp @@ -0,0 +1,71 @@ +#include +#include +#include + +#include +#include +#include +#include + + +/** Тестировать так: + * + * clickhouse-client --query="SELECT SearchPhrase AS k FROM test.hits WHERE k != ''" > phrases.tsv + * clickhouse-client --query="SELECT URL AS k FROM test.hits" > urls.tsv + * clickhouse-client --query="SELECT SearchPhrase AS k FROM test.hits" > phrases_with_empty.tsv + * clickhouse-client --query="SELECT Title AS k FROM test.hits" > titles.tsv + * clickhouse-client --query="SELECT PageCharset AS k FROM test.hits" > charset.tsv + * + * for i in {1..1000}; do ./sip_hash_perf < titles.tsv 2>&1 | grep Processed | grep -oP '\d+\.\d+ rows/sec'; done | awk '{ if ($1 > x) { x = $1; print x } }' + */ + + +int main(int argc, char ** argv) +{ + std::vector data; + DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); + + std::cerr << std::fixed << std::setprecision(3); + + { + Stopwatch watch; + + while (!in.eof()) + { + data.emplace_back(); + DB::readEscapedString(data.back(), in); + DB::assertString("\n", in); + } + + double seconds = watch.elapsedSeconds(); + std::cerr << "Read " + << data.size() << " rows, " + << (in.count() / 1048576.0) << " MiB " + << " in " << seconds << " sec., " + << (data.size() / seconds) << " rows/sec., " + << (in.count() / 1048576.0 / seconds) << " MiB/sec.\n"; + } + + { + size_t res = 0; + Stopwatch watch; + + for (const auto & s : data) + { + SipHash hash; + hash.update(s.data(), s.size()); + res += hash.get64(); + } + + double seconds = watch.elapsedSeconds(); + std::cerr << "Processed " + << data.size() << " rows, " + << (in.count() / 1048576.0) << " MiB " + << " in " << seconds << " sec., " + << (data.size() / seconds) << " rows/sec., " + << (in.count() / 1048576.0 / seconds) << " MiB/sec. " + << "(res = " << res << ")\n"; + } + + return 0; +} From 7f0e6fd3c18ebf9dbb0f7a2d98deec21921b91fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2015 03:35:36 +0300 Subject: [PATCH 14/21] Merge --- .../include/DB/Storages/MergeTree/DiskSpaceMonitor.h | 5 +++-- .../DB/Storages/MergeTree/MergeTreeDataMerger.h | 2 +- dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp | 11 ++++++----- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 12 +++++++++--- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/DiskSpaceMonitor.h b/dbms/include/DB/Storages/MergeTree/DiskSpaceMonitor.h index 04a28e996c1..9513090ec7f 100644 --- a/dbms/include/DB/Storages/MergeTree/DiskSpaceMonitor.h +++ b/dbms/include/DB/Storages/MergeTree/DiskSpaceMonitor.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -116,8 +117,8 @@ public: { size_t free_bytes = getUnreservedFreeSpace(path); if (free_bytes < size) - throw Exception("Not enough free disk space to reserve: " + toString(free_bytes) + " available, " - + toString(size) + " requested", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception("Not enough free disk space to reserve: " + formatReadableSizeWithBinarySuffix(free_bytes) + " available, " + + formatReadableSizeWithBinarySuffix(size) + " requested", ErrorCodes::NOT_ENOUGH_SPACE); return new Reservation(size); } diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h b/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h index 59b3638da43..4e0619e3662 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h @@ -49,7 +49,7 @@ public: DiskSpaceMonitor::Reservation * disk_reservation = nullptr); /// Примерное количество места на диске, нужное для мерджа. С запасом. - size_t estimateDiskSpaceForMerge(const MergeTreeData::DataPartsVector & parts); + static size_t estimateDiskSpaceForMerge(const MergeTreeData::DataPartsVector & parts); /** Отменяет все мерджи. Все выполняющиеся сейчас вызовы mergeParts скоро бросят исключение. * Все новые вызовы будут бросать исключения, пока не будет вызван uncancelAll(). diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index fb667751464..d75573dd62b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -220,10 +220,12 @@ bool MergeTreeDataMerger::selectPartsToMerge(MergeTreeData::DataPartsVector & pa { disk_space_warning_time = now; LOG_WARNING(log, "Won't merge parts from " << first_part->name << " to " << last_part->name - << " because not enough free space: " << available_disk_space << " free and unreserved " - << "(" << DiskSpaceMonitor::getReservedSpace() << " reserved in " + << " because not enough free space: " + << formatReadableSizeWithBinarySuffix(available_disk_space) << " free and unreserved " + << "(" << formatReadableSizeWithBinarySuffix(DiskSpaceMonitor::getReservedSpace()) << " reserved in " << DiskSpaceMonitor::getReservationCount() << " chunks), " - << cur_sum << " required now (+" << static_cast((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100) + << formatReadableSizeWithBinarySuffix(cur_sum) + << " required now (+" << static_cast((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100) << "% on overhead); suppressing similar warnings for the next hour"); } break; @@ -461,9 +463,8 @@ size_t MergeTreeDataMerger::estimateDiskSpaceForMerge(const MergeTreeData::DataP { size_t res = 0; for (const MergeTreeData::DataPartPtr & part : parts) - { res += part->size_in_bytes; - } + return static_cast(res * DISK_USAGE_COEFFICIENT_TO_RESERVE); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index f9e9f376fe2..2b0a872d7ce 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -229,7 +229,7 @@ bool StorageMergeTree::merge(size_t aio_threshold, bool aggressive, BackgroundPr return false; } - merging_tagger = new CurrentlyMergingPartsTagger(parts, merger.estimateDiskSpaceForMerge(parts), *this); + merging_tagger = new CurrentlyMergingPartsTagger(parts, MergeTreeDataMerger::estimateDiskSpaceForMerge(parts), *this); /// Если собираемся сливать большие куски, увеличим счетчик потоков, сливающих большие куски. if (pool_context) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index e113fc298d3..1cd57a70216 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -860,12 +861,15 @@ bool StorageReplicatedMergeTree::executeLogEntry(const LogEntry & entry, Backgro } } + size_t sum_parts_size_in_bytes = MergeTreeDataMerger::estimateDiskSpaceForMerge(parts); + DiskSpaceMonitor::ReservationPtr reserved_space = DiskSpaceMonitor::reserve(full_path, sum_parts_size_in_bytes); /// Может бросить исключение. + auto table_lock = lockStructure(false); const auto & merge_entry = context.getMergeList().insert(database_name, table_name, entry.new_part_name); MergeTreeData::Transaction transaction; size_t aio_threshold = context.getSettings().min_bytes_to_use_direct_io; - MergeTreeData::DataPartPtr part = merger.mergeParts(parts, entry.new_part_name, *merge_entry, aio_threshold, &transaction); + MergeTreeData::DataPartPtr part = merger.mergeParts(parts, entry.new_part_name, *merge_entry, aio_threshold, &transaction, reserved_space); zkutil::Ops ops; checkPartAndAddToZooKeeper(part, ops); @@ -1360,8 +1364,10 @@ void StorageReplicatedMergeTree::mergeSelectingThread() String merged_name; - if ( !merger.selectPartsToMerge(parts, merged_name, MergeTreeDataMerger::NO_LIMIT, false, false, only_small, can_merge) - && !merger.selectPartsToMerge(parts, merged_name, MergeTreeDataMerger::NO_LIMIT, true, false, only_small, can_merge)) + size_t disk_space = DiskSpaceMonitor::getUnreservedFreeSpace(full_path); + + if ( !merger.selectPartsToMerge(parts, merged_name, disk_space, false, false, only_small, can_merge) + && !merger.selectPartsToMerge(parts, merged_name, disk_space, true, false, only_small, can_merge)) { break; } From 84c23517745d207c68617beb0b13f5c5f97a6b15 Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Tue, 26 May 2015 15:44:34 +0300 Subject: [PATCH 15/21] remove old todo --- dbms/src/Interpreters/InterpreterCheckQuery.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index 8bbe323c835..cc85f33c74c 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -12,7 +12,6 @@ InterpreterCheckQuery::InterpreterCheckQuery(DB::ASTPtr query_ptr_, DB::Context& BlockInputStreamPtr InterpreterCheckQuery::execute() { - /// @TODO ASTCheckQuery & alter = typeid_cast(*query_ptr); String & table_name = alter.table; String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database; From bff45a25fad7dac0adf37f871a3ae16a6fc0396d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2015 02:09:56 +0300 Subject: [PATCH 16/21] dbms: improved type checking in IN [#METR-16793]. --- dbms/src/Interpreters/Set.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 8c4a9dc10f6..7f0fb917923 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -325,8 +325,21 @@ static Field convertToType(const Field & src, const IDataType & type) + Field::Types::toString(src.getType()) + " literal at right"); } } + else + { + if (src.getType() == Field::Types::UInt64 + || src.getType() == Field::Types::Int64 + || src.getType() == Field::Types::Float64 + || src.getType() == Field::Types::Null + || (src.getType() == Field::Types::String + && !typeid_cast(&type) + && !typeid_cast(&type)) + || (src.getType() == Field::Types::Array + && !typeid_cast(&type))) + throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " + + Field::Types::toString(src.getType()) + " literal at right"); + } - /// В остальных случаях, приведение типа не осуществляется. return src; } From 5b8008e2b1506e29d877621abe9405156e83b4c8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2015 02:38:55 +0300 Subject: [PATCH 17/21] dbms: fixed error with tuples [#METR-13649]. --- dbms/include/DB/DataTypes/DataTypeTuple.h | 1 + .../queries/0_stateless/00171_array_of_tuple_remote.reference | 2 ++ dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.sql | 1 + 3 files changed, 4 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.reference create mode 100644 dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.sql diff --git a/dbms/include/DB/DataTypes/DataTypeTuple.h b/dbms/include/DB/DataTypes/DataTypeTuple.h index 5b0b45db805..db9dd5bdb09 100644 --- a/dbms/include/DB/DataTypes/DataTypeTuple.h +++ b/dbms/include/DB/DataTypes/DataTypeTuple.h @@ -138,6 +138,7 @@ public: { ColumnWithNameAndType col; col.column = elems[i]->createColumn(); + col.type = elems[i]->clone(); tuple_block.insert(col); } return new ColumnTuple(tuple_block); diff --git a/dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.reference b/dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.reference new file mode 100644 index 00000000000..cd4823e219f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.reference @@ -0,0 +1,2 @@ +[(1,4),(2,5),(3,6)] +[(1,4),(2,5),(3,6)] diff --git a/dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.sql b/dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.sql new file mode 100644 index 00000000000..5db737e8e8b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00171_array_of_tuple_remote.sql @@ -0,0 +1 @@ +SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) FROM remote('127.0.0.{1,2}', system.one) ORDER BY rand(); From a9becd36ea876a2a5ae63ba37d0a73e2dbbfbd21 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2015 08:18:47 +0300 Subject: [PATCH 18/21] dbms: allowed constant expressions in IN [#METR-2944]. --- dbms/include/DB/Interpreters/Set.h | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- dbms/src/Interpreters/Set.cpp | 76 +++++++++++++------- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/dbms/include/DB/Interpreters/Set.h b/dbms/include/DB/Interpreters/Set.h index 4b6bfce1d75..e2dc2405279 100644 --- a/dbms/include/DB/Interpreters/Set.h +++ b/dbms/include/DB/Interpreters/Set.h @@ -283,7 +283,7 @@ public: * node - это список значений: 1, 2, 3 или список tuple-ов: (1, 2), (3, 4), (5, 6). * create_ordered_set - создавать ли вектор упорядоченных элементов. Нужен для работы индекса */ - void createFromAST(DataTypes & types, ASTPtr node, bool create_ordered_set); + void createFromAST(DataTypes & types, ASTPtr node, const Context & context, bool create_ordered_set); // Возвращает false, если превышено какое-нибудь ограничение, и больше не нужно вставлять. bool insertFromBlock(const Block & block, bool create_ordered_set = false); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 9307b811854..8906e36fbfa 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -954,7 +954,7 @@ void ExpressionAnalyzer::makeExplicitSet(ASTFunction * node, const Block & sampl ASTPtr ast_set_ptr = ast_set; ast_set->set = new Set(settings.limits); ast_set->is_explicit = true; - ast_set->set->createFromAST(set_element_types, elements_ast, create_ordered_set); + ast_set->set->createFromAST(set_element_types, elements_ast, context, create_ordered_set); arg = ast_set_ptr; } diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 7f0fb917923..8e74ee231ef 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -10,8 +10,12 @@ #include #include #include +#include #include +#include +#include + #include #include #include @@ -259,10 +263,10 @@ static Field convertToType(const Field & src, const IDataType & type) if (is_uint8 || is_uint16 || is_uint32 || is_uint64) { if (src.getType() == Field::Types::Int64) - throw Exception("Type mismatch in IN section: " + type.getName() + " at left, signed literal at right"); + throw Exception("Type mismatch in IN section: " + type.getName() + " at left, signed at right"); if (src.getType() == Field::Types::Float64) - throw Exception("Type mismatch in IN section: " + type.getName() + " at left, floating point literal at right"); + throw Exception("Type mismatch in IN section: " + type.getName() + " at left, floating point at right"); if (src.getType() == Field::Types::UInt64) { @@ -276,12 +280,12 @@ static Field convertToType(const Field & src, const IDataType & type) } throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " - + Field::Types::toString(src.getType()) + " literal at right"); + + Field::Types::toString(src.getType()) + " at right"); } else if (is_int8 || is_int16 || is_int32 || is_int64) { if (src.getType() == Field::Types::Float64) - throw Exception("Type mismatch in IN section: " + type.getName() + " at left, floating point literal at right"); + throw Exception("Type mismatch in IN section: " + type.getName() + " at left, floating point at right"); if (src.getType() == Field::Types::UInt64) { @@ -308,7 +312,7 @@ static Field convertToType(const Field & src, const IDataType & type) } throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " - + Field::Types::toString(src.getType()) + " literal at right"); + + Field::Types::toString(src.getType()) + " at right"); } else if (is_float32 || is_float64) { @@ -322,7 +326,7 @@ static Field convertToType(const Field & src, const IDataType & type) return src; throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " - + Field::Types::toString(src.getType()) + " literal at right"); + + Field::Types::toString(src.getType()) + " at right"); } } else @@ -337,22 +341,54 @@ static Field convertToType(const Field & src, const IDataType & type) || (src.getType() == Field::Types::Array && !typeid_cast(&type))) throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " - + Field::Types::toString(src.getType()) + " literal at right"); + + Field::Types::toString(src.getType()) + " at right"); } return src; } -void Set::createFromAST(DataTypes & types, ASTPtr node, bool create_ordered_set) +/** Выполнить константное выражение (для элемента множества в IN). Весьма неоптимально. */ +static Field evaluateConstantExpression(ASTPtr & node, const Context & context) { - /** NOTE: - * На данный момент в секции IN не поддерживаются выражения (вызовы функций), кроме кортежей. - * То есть, например, не поддерживаются массивы. А по хорошему, хотелось бы поддерживать. - * Для этого можно сделать constant folding с помощью ExpressionAnalyzer/ExpressionActions. - * Но при этом, конечно же, не забыть про производительность работы с крупными множествами. - */ + ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer( + node, context, NamesAndTypesList{{ "_dummy", new DataTypeUInt8 }}).getConstActions(); + /// В блоке должен быть хотя бы один столбец, чтобы у него было известно число строк. + Block block_with_constants{{ new ColumnConstUInt8(1, 0), new DataTypeUInt8, "_dummy" }}; + + expr_for_constant_folding->execute(block_with_constants); + + if (!block_with_constants || block_with_constants.rows() == 0) + throw Exception("Logical error: empty block after evaluation constant expression for IN", ErrorCodes::LOGICAL_ERROR); + + String name = node->getColumnName(); + + if (!block_with_constants.has(name)) + throw Exception("Element of set in IN is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS); + + const IColumn & result_column = *block_with_constants.getByName(name).column; + + if (!result_column.isConst()) + throw Exception("Element of set in IN is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS); + + return result_column[0]; +} + + +static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context) +{ + if (ASTLiteral * lit = typeid_cast(node.get())) + return convertToType(lit->value, type); + else if (typeid_cast(node.get())) + return convertToType(evaluateConstantExpression(node, context), type); + else + throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); +} + + +void Set::createFromAST(DataTypes & types, ASTPtr node, const Context & context, bool create_ordered_set) +{ data_types = types; /// Засунем множество в блок. @@ -372,10 +408,7 @@ void Set::createFromAST(DataTypes & types, ASTPtr node, bool create_ordered_set) { if (data_types.size() == 1) { - if (ASTLiteral * lit = typeid_cast(&**it)) - block.getByPosition(0).column->insert(convertToType(lit->value, *data_types[0])); - else - throw Exception("Incorrect element of set. Must be literal.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + block.getByPosition(0).column->insert(extractValueFromNode(*it, *data_types[0], context)); } else if (ASTFunction * func = typeid_cast(&**it)) { @@ -388,16 +421,11 @@ void Set::createFromAST(DataTypes & types, ASTPtr node, bool create_ordered_set) for (size_t j = 0; j < tuple_size; ++j) { - if (ASTLiteral * lit = typeid_cast(&*func->arguments->children[j])) - block.getByPosition(j).column->insert(convertToType(lit->value, *data_types[j])); - else - throw Exception("Incorrect element of tuple in set. Must be literal.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + block.getByPosition(j).column->insert(extractValueFromNode(func->arguments->children[j], *data_types[j], context)); } } else throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET); - - /// NOTE: Потом можно реализовать возможность задавать константные выражения в множествах. } if (create_ordered_set) From 779d85afc90fe44059fd68e9a7de11289e548901 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2015 08:19:18 +0300 Subject: [PATCH 19/21] dbms: addition to prev. revision [#METR-2944]. --- dbms/src/Interpreters/Set.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 8e74ee231ef..6bb858a5aaf 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include From 7b76ff3118d5dc5920ea792e3775f6ebaac91cd0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2015 08:54:49 +0300 Subject: [PATCH 20/21] dbms: additions to prev. revision [#METR-2944]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 26 ++++++++++++------- dbms/src/Interpreters/Set.cpp | 19 ++++++++++++-- .../00172_constexprs_in_set.reference | 6 +++++ .../0_stateless/00172_constexprs_in_set.sql | 6 +++++ 4 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00172_constexprs_in_set.reference create mode 100644 dbms/tests/queries/0_stateless/00172_constexprs_in_set.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8906e36fbfa..5b4d58b65f8 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -922,16 +922,24 @@ void ExpressionAnalyzer::makeExplicitSet(ASTFunction * node, const Block & sampl if (ASTFunction * set_func = typeid_cast(&*arg)) { - if (set_func->name != "tuple") - throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - /// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2). - ASTFunction * any_element = typeid_cast(&*set_func->arguments->children.at(0)); - if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) - single_value = true; + if (set_func->name == "tuple") + { + /// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2). + ASTFunction * any_element = typeid_cast(&*set_func->arguments->children.at(0)); + if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) + single_value = true; + else + elements_ast = set_func->arguments; + } else - elements_ast = set_func->arguments; + { + if (set_element_types.size() >= 2) + throw Exception("Incorrect type of 2nd argument for function " + node->name + + ". Must be subquery or set of " + toString(set_element_types.size()) + "-element tuples.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + single_value = true; + } } else if (typeid_cast(&*arg)) { diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 6bb858a5aaf..08a6c6e0564 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include namespace DB @@ -234,7 +236,7 @@ bool Set::insertFromBlock(const Block & block, bool create_ordered_set) */ static Field convertToType(const Field & src, const IDataType & type) { - if (type.behavesAsNumber()) + if (type.isNumeric()) { bool is_uint8 = false; bool is_uint16 = false; @@ -246,6 +248,8 @@ static Field convertToType(const Field & src, const IDataType & type) bool is_int64 = false; bool is_float32 = false; bool is_float64 = false; + bool is_date = false; + bool is_datetime = false; false || (is_uint8 = typeid_cast(&type)) @@ -257,7 +261,10 @@ static Field convertToType(const Field & src, const IDataType & type) || (is_int32 = typeid_cast(&type)) || (is_int64 = typeid_cast(&type)) || (is_float32 = typeid_cast(&type)) - || (is_float64 = typeid_cast(&type)); + || (is_float64 = typeid_cast(&type)) + || (is_date = typeid_cast(&type)) + || (is_datetime = typeid_cast(&type)) + ; if (is_uint8 || is_uint16 || is_uint32 || is_uint64) { @@ -327,6 +334,14 @@ static Field convertToType(const Field & src, const IDataType & type) throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " + Field::Types::toString(src.getType()) + " at right"); } + else if (is_date || is_datetime) + { + if (src.getType() != Field::Types::UInt64) + throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " + + Field::Types::toString(src.getType()) + " at right"); + + return src; + } } else { diff --git a/dbms/tests/queries/0_stateless/00172_constexprs_in_set.reference b/dbms/tests/queries/0_stateless/00172_constexprs_in_set.reference new file mode 100644 index 00000000000..c06d3de5a56 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00172_constexprs_in_set.reference @@ -0,0 +1,6 @@ +14 3 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00172_constexprs_in_set.sql b/dbms/tests/queries/0_stateless/00172_constexprs_in_set.sql new file mode 100644 index 00000000000..3c438417053 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00172_constexprs_in_set.sql @@ -0,0 +1,6 @@ +SELECT sumIf(number, x), sum(x) FROM (SELECT number, number IN (0 + 1, 2 + 3, toUInt64(concat('8', ''))) AS x FROM system.numbers LIMIT 10); +SELECT toDate('2015-06-12') IN toDate('2015-06-12'); +SELECT toDate('2015-06-12') IN (toDate('2015-06-12')); +SELECT today() IN (toDate('2014-01-01'), toDate(now())); +SELECT - -1 IN (2 - 1); +SELECT - -1 IN (2 - 1, 3); From 26813667eb8248614c3d77e7e73c28b15a9d4c59 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2015 10:46:58 +0300 Subject: [PATCH 21/21] dbms: allowed to compare Date and DateTime with constant string [#METR-2944]. --- .../DB/Functions/FunctionsComparison.h | 214 +++++++++++++----- dbms/src/Interpreters/ExpressionActions.cpp | 3 +- ...e_date_time_with_constant_string.reference | 80 +++++++ ...compare_date_time_with_constant_string.sql | 83 +++++++ 4 files changed, 317 insertions(+), 63 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.reference create mode 100644 dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.sql diff --git a/dbms/include/DB/Functions/FunctionsComparison.h b/dbms/include/DB/Functions/FunctionsComparison.h index d459055367b..a901f363342 100644 --- a/dbms/include/DB/Functions/FunctionsComparison.h +++ b/dbms/include/DB/Functions/FunctionsComparison.h @@ -26,6 +26,10 @@ namespace DB * - даты; * - даты-с-временем; * внутри каждой группы, но не из разных групп. + * + * Исключение: можно сравнивать дату и дату-с-временем с константной строкой. Пример: EventDate = '2015-01-01'. + * + * TODO Массивы, кортежи. */ /** Игнорируем warning о сравнении signed и unsigned. @@ -391,9 +395,9 @@ public: private: template - bool executeNumRightType(Block & block, const ColumnNumbers & arguments, size_t result, const ColumnVector * col_left) + bool executeNumRightType(Block & block, size_t result, const ColumnVector * col_left, const IColumn * col_right_untyped) { - if (ColumnVector * col_right = typeid_cast *>(&*block.getByPosition(arguments[1]).column)) + if (const ColumnVector * col_right = typeid_cast *>(col_right_untyped)) { ColumnUInt8 * col_res = new ColumnUInt8; block.getByPosition(result).column = col_res; @@ -404,7 +408,7 @@ private: return true; } - else if (ColumnConst * col_right = typeid_cast *>(&*block.getByPosition(arguments[1]).column)) + else if (const ColumnConst * col_right = typeid_cast *>(col_right_untyped)) { ColumnUInt8 * col_res = new ColumnUInt8; block.getByPosition(result).column = col_res; @@ -420,9 +424,9 @@ private: } template - bool executeNumConstRightType(Block & block, const ColumnNumbers & arguments, size_t result, const ColumnConst * col_left) + bool executeNumConstRightType(Block & block, size_t result, const ColumnConst * col_left, const IColumn * col_right_untyped) { - if (ColumnVector * col_right = typeid_cast *>(&*block.getByPosition(arguments[1]).column)) + if (const ColumnVector * col_right = typeid_cast *>(col_right_untyped)) { ColumnUInt8 * col_res = new ColumnUInt8; block.getByPosition(result).column = col_res; @@ -433,7 +437,7 @@ private: return true; } - else if (ColumnConst * col_right = typeid_cast *>(&*block.getByPosition(arguments[1]).column)) + else if (const ColumnConst * col_right = typeid_cast *>(col_right_untyped)) { UInt8 res = 0; NumComparisonImpl>::constant_constant(col_left->getData(), col_right->getData(), res); @@ -448,41 +452,41 @@ private: } template - bool executeNumLeftType(Block & block, const ColumnNumbers & arguments, size_t result) + bool executeNumLeftType(Block & block, size_t result, const IColumn * col_left_untyped, const IColumn * col_right_untyped) { - if (ColumnVector * col_left = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) + if (const ColumnVector * col_left = typeid_cast *>(col_left_untyped)) { - if ( executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left) - || executeNumRightType(block, arguments, result, col_left)) + if ( executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped) + || executeNumRightType(block, result, col_left, col_right_untyped)) return true; else - throw Exception("Illegal column " + block.getByPosition(arguments[1]).column->getName() + throw Exception("Illegal column " + col_right_untyped->getName() + " of second argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } - else if (ColumnConst * col_left = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) + else if (const ColumnConst * col_left = typeid_cast *>(col_left_untyped)) { - if ( executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left) - || executeNumConstRightType(block, arguments, result, col_left)) + if ( executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped) + || executeNumConstRightType(block, result, col_left, col_right_untyped)) return true; else - throw Exception("Illegal column " + block.getByPosition(arguments[1]).column->getName() + throw Exception("Illegal column " + col_right_untyped->getName() + " of second argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } @@ -490,17 +494,14 @@ private: return false; } - void executeString(Block & block, const ColumnNumbers & arguments, size_t result) + void executeString(Block & block, size_t result, const IColumn * c0, const IColumn * c1) { - IColumn * c0 = &*block.getByPosition(arguments[0]).column; - IColumn * c1 = &*block.getByPosition(arguments[1]).column; - - ColumnString * c0_string = typeid_cast(c0); - ColumnString * c1_string = typeid_cast(c1); - ColumnFixedString * c0_fixed_string = typeid_cast(c0); - ColumnFixedString * c1_fixed_string = typeid_cast(c1); - ColumnConstString * c0_const = typeid_cast(c0); - ColumnConstString * c1_const = typeid_cast(c1); + const ColumnString * c0_string = typeid_cast(c0); + const ColumnString * c1_string = typeid_cast(c1); + const ColumnFixedString * c0_fixed_string = typeid_cast(c0); + const ColumnFixedString * c1_fixed_string = typeid_cast(c1); + const ColumnConstString * c0_const = typeid_cast(c0); + const ColumnConstString * c1_const = typeid_cast(c1); using StringImpl = StringComparisonImpl>; @@ -559,13 +560,66 @@ private: c_res->getData()); else throw Exception("Illegal columns " - + block.getByPosition(arguments[0]).column->getName() + " and " - + block.getByPosition(arguments[1]).column->getName() + + c0->getName() + " and " + c1->getName() + " of arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } } + void executeDateOrDateTimeWithConstString(Block & block, size_t result, + const IColumn * col_left_untyped, const IColumn * col_right_untyped, + bool left_is_num, bool right_is_num) + { + /// Особый случай - сравнение дат и дат-с-временем со строковой константой. + const IColumn * column_date_or_datetime = left_is_num ? col_left_untyped : col_right_untyped; + const IColumn * column_string_untyped = !left_is_num ? col_left_untyped : col_right_untyped; + + bool is_date = false; + bool is_date_time = false; + + is_date = typeid_cast *>(column_date_or_datetime) + || typeid_cast *>(column_date_or_datetime); + + if (!is_date) + is_date_time = typeid_cast *>(column_date_or_datetime) + || typeid_cast *>(column_date_or_datetime); + + const ColumnConstString * column_string = typeid_cast(column_string_untyped); + + if (!column_string + || (!is_date && !is_date_time)) + throw Exception("Illegal columns " + col_left_untyped->getName() + " and " + col_right_untyped->getName() + + " of arguments of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + if (is_date) + { + DayNum_t date; + ReadBufferFromString in(column_string->getData()); + readDateText(date, in); + if (!in.eof()) + throw Exception("String is too long for Date: " + column_string->getData()); + + ColumnConst parsed_const_date(block.rowsInFirstColumn(), date); + executeNumLeftType(block, result, + left_is_num ? col_left_untyped : &parsed_const_date, + left_is_num ? &parsed_const_date : col_right_untyped); + } + else if (is_date_time) + { + time_t date_time; + ReadBufferFromString in(column_string->getData()); + readDateTimeText(date_time, in); + if (!in.eof()) + throw Exception("String is too long for DateTime: " + column_string->getData()); + + ColumnConst parsed_const_date_time(block.rowsInFirstColumn(), date_time); + executeNumLeftType(block, result, + left_is_num ? col_left_untyped : &parsed_const_date_time, + left_is_num ? &parsed_const_date_time : col_right_untyped); + } + } + public: /// Получить имя функции. String getName() const @@ -581,12 +635,36 @@ public: + toString(arguments.size()) + ", should be 2.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!( ( arguments[0]->isNumeric() && arguments[0]->behavesAsNumber() - && arguments[1]->isNumeric() && arguments[1]->behavesAsNumber()) - || ( (arguments[0]->getName() == "String" || arguments[0]->getName().substr(0, 11) == "FixedString") - && (arguments[1]->getName() == "String" || arguments[1]->getName().substr(0, 11) == "FixedString")) - || (arguments[0]->getName() == "Date" && arguments[1]->getName() == "Date") - || (arguments[0]->getName() == "DateTime" && arguments[1]->getName() == "DateTime"))) + bool left_is_date = false; + bool left_is_date_time = false; + bool left_is_string = false; + bool left_is_fixed_string = false; + + false + || (left_is_date = typeid_cast(arguments[0].get())) + || (left_is_date_time = typeid_cast(arguments[0].get())) + || (left_is_string = typeid_cast(arguments[0].get())) + || (left_is_fixed_string = typeid_cast(arguments[0].get())); + + bool right_is_date = false; + bool right_is_date_time = false; + bool right_is_string = false; + bool right_is_fixed_string = false; + + false + || (right_is_date = typeid_cast(arguments[1].get())) + || (right_is_date_time = typeid_cast(arguments[1].get())) + || (right_is_string = typeid_cast(arguments[1].get())) + || (right_is_fixed_string = typeid_cast(arguments[1].get())); + + if (!( (arguments[0]->behavesAsNumber() && arguments[1]->behavesAsNumber()) + || ((left_is_string || left_is_fixed_string) && (right_is_string || right_is_fixed_string)) + || (left_is_date && right_is_date) + || (left_is_date && right_is_string) /// Можно сравнивать дату и дату-с-временем с константной строкой. + || (left_is_string && right_is_date) + || (left_is_date_time && right_is_date_time) + || (left_is_date_time && right_is_string) + || (left_is_string && right_is_date_time))) throw Exception("Illegal types of arguments (" + arguments[0]->getName() + ", " + arguments[1]->getName() + ")" " of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -596,24 +674,36 @@ public: /// Выполнить функцию над блоком. void execute(Block & block, const ColumnNumbers & arguments, size_t result) { - if (block.getByPosition(arguments[0]).column->isNumeric()) + const IColumn * col_left_untyped = block.getByPosition(arguments[0]).column.get(); + const IColumn * col_right_untyped = block.getByPosition(arguments[1]).column.get(); + + bool left_is_num = col_left_untyped->isNumeric(); + bool right_is_num = col_right_untyped->isNumeric(); + + if (left_is_num && right_is_num) { - if (!( executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result) - || executeNumLeftType(block, arguments, result))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + if (!( executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped))) + throw Exception("Illegal column " + col_left_untyped->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } + else if (!left_is_num && !right_is_num) + { + executeString(block, result, col_left_untyped, col_right_untyped); + } else - executeString(block, arguments, result); + { + executeDateOrDateTimeWithConstString(block, result, col_left_untyped, col_right_untyped, left_is_num, right_is_num); + } } }; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index fbfdd349381..c6b78cfbc5a 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -124,12 +124,13 @@ void ExpressionAction::prepare(Block & sample_block) /// Если все аргументы и требуемые столбцы - константы, выполним функцию. if (all_const) { + size_t result_position = sample_block.columns(); + ColumnWithNameAndType new_column; new_column.name = result_name; new_column.type = result_type; sample_block.insert(new_column); - size_t result_position = sample_block.getPositionByName(result_name); function->execute(sample_block, arguments, prerequisites, result_position); /// Если получилась не константа, на всякий случай будем считать результат неизвестным. diff --git a/dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.reference b/dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.reference new file mode 100644 index 00000000000..7f8dca8f200 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.reference @@ -0,0 +1,80 @@ +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +0 +0 +1 +1 +0 +0 +1 +1 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +0 +0 +1 +1 +0 +0 +1 +1 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +0 +0 +1 +1 +0 +0 +1 +1 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +0 +0 +1 +1 +0 +0 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.sql b/dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.sql new file mode 100644 index 00000000000..c89292a252a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00173_compare_date_time_with_constant_string.sql @@ -0,0 +1,83 @@ +SELECT toDate('2015-02-03') = '2015-02-03'; +SELECT '2015-02-03' = toDate('2015-02-03'); +SELECT toDate('2015-02-03') = '2015-02-04'; +SELECT '2015-02-03' = toDate('2015-02-04'); +SELECT toDate('2015-02-03') < '2015-02-04'; +SELECT '2015-02-03' < toDate('2015-02-04'); +SELECT toDate('2015-02-03') > '2015-02-04'; +SELECT '2015-02-03' > toDate('2015-02-04'); +SELECT toDate('2015-02-03') <= '2015-02-04'; +SELECT '2015-02-03' <= toDate('2015-02-04'); +SELECT toDate('2015-02-03') >= '2015-02-04'; +SELECT '2015-02-03' >= toDate('2015-02-04'); +SELECT toDate('2015-02-05') < '2015-02-04'; +SELECT '2015-02-05' < toDate('2015-02-04'); +SELECT toDate('2015-02-05') > '2015-02-04'; +SELECT '2015-02-05' > toDate('2015-02-04'); +SELECT toDate('2015-02-05') <= '2015-02-04'; +SELECT '2015-02-05' <= toDate('2015-02-04'); +SELECT toDate('2015-02-05') >= '2015-02-04'; +SELECT '2015-02-05' >= toDate('2015-02-04'); + +SELECT materialize(toDate('2015-02-03')) = '2015-02-03'; +SELECT '2015-02-03' = materialize(toDate('2015-02-03')); +SELECT materialize(toDate('2015-02-03')) = '2015-02-04'; +SELECT '2015-02-03' = materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-03')) < '2015-02-04'; +SELECT '2015-02-03' < materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-03')) > '2015-02-04'; +SELECT '2015-02-03' > materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-03')) <= '2015-02-04'; +SELECT '2015-02-03' <= materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-03')) >= '2015-02-04'; +SELECT '2015-02-03' >= materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-05')) < '2015-02-04'; +SELECT '2015-02-05' < materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-05')) > '2015-02-04'; +SELECT '2015-02-05' > materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-05')) <= '2015-02-04'; +SELECT '2015-02-05' <= materialize(toDate('2015-02-04')); +SELECT materialize(toDate('2015-02-05')) >= '2015-02-04'; +SELECT '2015-02-05' >= materialize(toDate('2015-02-04')); + +SELECT toDateTime('2015-02-03 04:05:06') = '2015-02-03 04:05:06'; +SELECT '2015-02-03 04:05:06' = toDateTime('2015-02-03 04:05:06'); +SELECT toDateTime('2015-02-03 04:05:06') = '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' = toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 04:05:06') < '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' < toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 04:05:06') > '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' > toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 04:05:06') <= '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' <= toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 04:05:06') >= '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' >= toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 06:07:08') < '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' < toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 06:07:08') > '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' > toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 06:07:08') <= '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' <= toDateTime('2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 06:07:08') >= '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' >= toDateTime('2015-02-03 05:06:07'); + +SELECT materialize(toDateTime('2015-02-03 04:05:06')) = '2015-02-03 04:05:06'; +SELECT '2015-02-03 04:05:06' = materialize(toDateTime('2015-02-03 04:05:06')); +SELECT materialize(toDateTime('2015-02-03 04:05:06')) = '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' = materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 04:05:06')) < '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' < materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 04:05:06')) > '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' > materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 04:05:06')) <= '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' <= materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 04:05:06')) >= '2015-02-03 05:06:07'; +SELECT '2015-02-03 04:05:06' >= materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 06:07:08')) < '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' < materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 06:07:08')) > '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' > materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 06:07:08')) <= '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' <= materialize(toDateTime('2015-02-03 05:06:07')); +SELECT materialize(toDateTime('2015-02-03 06:07:08')) >= '2015-02-03 05:06:07'; +SELECT '2015-02-03 06:07:08' >= materialize(toDateTime('2015-02-03 05:06:07'));