From 300b7342e75127d3a2e2b0c12d40ad6e8efdb735 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 21 Jan 2015 14:39:48 +0300 Subject: [PATCH 01/43] dbms: dummy types and functions for external dictionaries. [#METR-13298] --- .../DB/Dictionaries/DictionaryFactory.h | 21 +++ dbms/include/DB/Dictionaries/FlatDictionary.h | 32 +++++ dbms/include/DB/Dictionaries/IDictionary.h | 22 +++ .../DB/Functions/FunctionsDictionaries.h | 130 ++++++++++++++++++ dbms/include/DB/Interpreters/Dictionaries.h | 31 +++++ dbms/src/Functions/FunctionsDictionaries.cpp | 3 +- dbms/src/Interpreters/Dictionaries.cpp | 58 ++++++++ 7 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 dbms/include/DB/Dictionaries/DictionaryFactory.h create mode 100644 dbms/include/DB/Dictionaries/FlatDictionary.h create mode 100644 dbms/include/DB/Dictionaries/IDictionary.h create mode 100644 dbms/src/Interpreters/Dictionaries.cpp diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h new file mode 100644 index 00000000000..c30eb12bd4d --- /dev/null +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class DictionaryFactory : public Singleton +{ +public: + DictionaryPtr create() const + { + return ext::make_unique(); + } +}; + +} diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h new file mode 100644 index 00000000000..c6d5e8f67e2 --- /dev/null +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class FlatDictionary : public IDictionary +{ +public: + FlatDictionary() = default; + + StringRef getString(const id_t id, const std::string & attribute_name) const override { + return { "", 0 }; + } + + UInt64 getUInt64(const id_t id, const std::string & attribute_name) const override { + return 0; + } + +private: + using value_t = std::pair; + using attribute_t = std::vector; + using attributes_t = std::map; + + attribute_t attributes; +}; + +} diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h new file mode 100644 index 00000000000..06bebea4a9a --- /dev/null +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class IDictionary +{ +public: + using id_t = std::uint64_t; + + virtual StringRef getString(const id_t id, const std::string & attribute_name) const = 0; + virtual UInt64 getUInt64(const id_t id, const std::string & attribute_name) const = 0; + + virtual ~IDictionary() = default; +}; + +using DictionaryPtr = std::unique_ptr; + +} diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index e4160feb246..3863a5966ca 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -778,4 +778,134 @@ public: } }; + +class FunctionDictGetString : public IFunction +{ +public: + static constexpr auto name = "dictGetString"; + + static IFunction * create(const Context & context) + { + return new FunctionDictGetString{context.getDictionaries()}; + }; + + FunctionDictGetString(const Dictionaries & dictionaries) : dictionaries(dictionaries) {} + + String getName() const override { return name; } + +private: + DataTypePtr getReturnType(const DataTypes & arguments) const override + { + if (arguments.size() != 3) + throw Exception{ + "Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 3.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH + }; + + if (!typeid_cast(arguments[0].get())) + { + throw Exception{ + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + if (!typeid_cast(arguments[1].get())) + { + throw Exception{ + "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + const auto id_arg = arguments[2].get(); + if (!typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg)) + { + throw Exception{ + "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + return new DataTypeString; + } + + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) + { + const auto dict_name_col = typeid_cast *>(block.getByPosition(arguments[0]).column.get()); + if (!dict_name_col) + throw Exception{ + "First argument of function " + getName() + " must be a constant string", + ErrorCodes::ILLEGAL_COLUMN + }; + + auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + + const auto attr_name_col = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); + if (!attr_name_col) + throw Exception{ + "Second argument of function " + getName() + " must be a constant string", + ErrorCodes::ILLEGAL_COLUMN + }; + + const auto & attr_name = attr_name_col->getData(); + + const auto id_col = block.getByPosition(arguments[2]).column.get(); + if (!execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col)) + { + throw Exception{ + "Third argument of function " + getName() + " must be integral", + ErrorCodes::ILLEGAL_COLUMN + }; + } + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const std::string & attr_name, const IColumn * const id_col_untyped) + { + if (const auto id_col = typeid_cast *>(id_col_untyped)) + { + const auto out = new ColumnString; + block.getByPosition(result).column = out; + + for (const auto & id : id_col->getData()) + { + const auto string_ref = dictionary->getString(id, attr_name); + out->insertData(string_ref.data, string_ref.size); + } + + return true; + } + else if (const auto id_col = typeid_cast *>(id_col_untyped)) + { + block.getByPosition(result).column = new ColumnConst{ + id_col->size(), + dictionary->getString(id_col->getData(), attr_name).toString() + }; + + return true; + }; + + return false; + } + + const Dictionaries & dictionaries; +}; + } diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index a202c8b1cbd..5a5bbb72d87 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -11,6 +12,9 @@ #include #include +#include +#include + namespace DB { @@ -26,6 +30,7 @@ private: MultiVersion tech_data_hierarchy; MultiVersion categories_hierarchy; MultiVersion regions_names; + std::unordered_map>> external_dictionaries; /// Периодичность обновления справочников, в секундах. int reload_period; @@ -116,10 +121,24 @@ private: was_exception = true; } + try + { + reloadExternalDictionaries(); + } + catch (...) + { + handleException(); + was_exception = true; + } + + if (!was_exception) LOG_INFO(log, "Loaded dictionaries."); } + + void reloadExternalDictionaries(); + /// Обновляет каждые reload_period секунд. void reloadPeriodically() { @@ -167,6 +186,18 @@ public: { return regions_names.get(); } + + MultiVersion::Version getExternalDictionary(const std::string & name) const + { + const auto it = external_dictionaries.find(name); + if (it == std::end(external_dictionaries)) + throw Exception{ + "No such dictionary: " + name, + ErrorCodes::BAD_ARGUMENTS + }; + + return it->second->get(); + } }; } diff --git a/dbms/src/Functions/FunctionsDictionaries.cpp b/dbms/src/Functions/FunctionsDictionaries.cpp index 14f67389b8b..fc05edd9e89 100644 --- a/dbms/src/Functions/FunctionsDictionaries.cpp +++ b/dbms/src/Functions/FunctionsDictionaries.cpp @@ -25,6 +25,7 @@ void registerFunctionsDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } - + } diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp new file mode 100644 index 00000000000..72fbbc9c3f2 --- /dev/null +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -0,0 +1,58 @@ +#include +#include +#include + + +namespace DB +{ + +namespace +{ + template struct release + { + void operator()(const T * const ptr) { ptr->release(); } + }; + template using config_ptr_t = std::unique_ptr>; +}; + +void Dictionaries::reloadExternalDictionaries() +{ + const auto config_path = Poco::Util::Application::instance().config().getString("dictionaries_config"); + const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; + + /// get all dictionaries' definitions + Poco::Util::AbstractConfiguration::Keys keys; + config->keys(keys); + + /// for each dictionary defined in xml config + for (const auto & key : keys) + { + if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) + { + /// @todo maybe output a warning + continue; + } + + std::cout << key << std::endl; + const auto & prefix = key + '.'; + + const auto & name = config->getString(prefix + "name"); + if (name.empty()) + { + /// @todo handle error, dictionary name cannot be empty + } + + auto dict_ptr = DictionaryFactory::instance().create(); + const auto it = external_dictionaries.find(name); + if (it == std::end(external_dictionaries)) + { + external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); + } + else + { + it->second->set(dict_ptr.release()); + } + } +}; + +} From 92664ed612b4354edf1a7fe27263d3b5568a4824 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 22 Jan 2015 17:32:38 +0300 Subject: [PATCH 02/43] dbms: sparse array implementation of FlatDictionary. [#METR-13298] --- .../DB/Dictionaries/DictionaryFactory.h | 36 ++- dbms/include/DB/Dictionaries/FlatDictionary.h | 261 +++++++++++++++++- dbms/include/DB/Dictionaries/IDictionary.h | 69 ++++- .../DB/Functions/FunctionsDictionaries.h | 142 ++++++++++ dbms/include/DB/Interpreters/Dictionaries.h | 8 +- dbms/src/Functions/FunctionsDictionaries.cpp | 8 + dbms/src/Interpreters/Context.cpp | 2 +- dbms/src/Interpreters/Dictionaries.cpp | 24 +- 8 files changed, 522 insertions(+), 28 deletions(-) diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index c30eb12bd4d..0249715c24e 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -12,9 +11,40 @@ namespace DB class DictionaryFactory : public Singleton { public: - DictionaryPtr create() const + DictionaryPtr create(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix, + const Context & context) const { - return ext::make_unique(); + const auto & layout_prefix = config_prefix + "layout."; + + auto dict_struct = DictionaryStructure::fromXML(config, config_prefix + "structure"); + + if (config.has(layout_prefix + "flat")) + { + return ext::make_unique(dict_struct, config, config_prefix, context); + } + else if (config.has(layout_prefix + "hashed")) + { + throw Exception{ + "Dictionary of type 'hashed' is not yet implemented", + ErrorCodes::NOT_IMPLEMENTED + }; + } + else if (config.has(layout_prefix + "cache")) + { + const auto size = config.getInt(layout_prefix + "cache.size", 0); + if (size == 0) + throw Exception{ + "Dictionary of type 'cache' cannot have size of 0 bytes", + ErrorCodes::TOO_SMALL_BUFFER_SIZE + }; + + throw Exception{ + "Dictionary of type 'cache' is not yet implemented", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + throw Exception{"No dictionary type specified", ErrorCodes::BAD_ARGUMENTS}; } }; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index c6d5e8f67e2..da9cbf139f7 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -1,6 +1,11 @@ #pragma once #include +#include +#include +#include +#include +#include #include #include #include @@ -8,25 +13,259 @@ namespace DB { +const auto max_array_size = 500000; +const auto max_block_size = 8192; + +/// @todo manage arrays using std::vector or PODArray, start with an initial size, expand up to max_array_size class FlatDictionary : public IDictionary { public: - FlatDictionary() = default; + FlatDictionary(const DictionaryStructure & dict_struct, const Poco::Util::XMLConfiguration & config, + const std::string & config_prefix, const Context & context) + { + for (const auto & attribute : dict_struct.attributes) + { + attributes.emplace(attribute.name, + createAttributeWithType(getAttributeTypeByName(attribute.type), attribute.null_value)); - StringRef getString(const id_t id, const std::string & attribute_name) const override { - return { "", 0 }; - } + if (attribute.hierarchical) + hierarchical_attribute = &attributes[attribute.name]; + } - UInt64 getUInt64(const id_t id, const std::string & attribute_name) const override { - return 0; + if (config.has(config_prefix + "source.file")) + { + const auto & file_name = config.getString(config_prefix + "source.file.path"); + const auto & format = config.getString(config_prefix + "source.file.format"); + + ReadBufferFromFile in{file_name}; + auto sample_block = createSampleBlock(dict_struct, context); + auto stream = context.getFormatFactory().getInput( + format, in, sample_block, max_block_size, context.getDataTypeFactory()); + + while (const auto block = stream->read()) + { + const auto & id_column = *block.getByPosition(0).column; + + for (const auto attribute_idx : ext::range(1, attributes.size())) + { + const auto & attribute_column = *block.getByPosition(attribute_idx).column; + auto & attribute = attributes[dict_struct.attributes[attribute_idx - 1].name]; + + for (const auto row_idx : ext::range(1, attribute_column.size())) + setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); + } + } + } } -private: - using value_t = std::pair; - using attribute_t = std::vector; - using attributes_t = std::map; + UInt64 getUInt64(const id_t id, const std::string & attribute_name) const override + { + const auto & attribute = findAttribute(attribute_name); - attribute_t attributes; + if (attribute.type != attribute_type::uint64) + throw Exception{ + "Type mismatch: attribute " + attribute_name + " has a type different from UInt64", + ErrorCodes::TYPE_MISMATCH + }; + + if (id < max_array_size) + return attribute.uint64_array[id]; + + return attribute.uint64_null_value; + } + + StringRef getString(const id_t id, const std::string & attribute_name) const override + { + const auto & attribute = findAttribute(attribute_name); + + if (attribute.type != attribute_type::string) + throw Exception{ + "Type mismatch: attribute " + attribute_name + " has a type different from String", + ErrorCodes::TYPE_MISMATCH + }; + + if (id < max_array_size) + return { attribute.string_array[id].data(), attribute.string_array[id].size() }; + + return { attribute.string_null_value.data(), attribute.string_null_value.size() }; + } + +private: + enum class attribute_type + { + uint8, + uint16, + uint32, + uint64, + int8, + int16, + int32, + int64, + string + }; + + struct attribute_t + { + attribute_type type; + UInt8 uint8_null_value; + UInt16 uint16_null_value; + UInt32 uint32_null_value; + UInt64 uint64_null_value; + Int8 int8_null_value; + Int16 int16_null_value; + Int32 int32_null_value; + Int64 int64_null_value; + String string_null_value; + std::unique_ptr uint8_array; + std::unique_ptr uint16_array; + std::unique_ptr uint32_array; + std::unique_ptr uint64_array; + std::unique_ptr int8_array; + std::unique_ptr int16_array; + std::unique_ptr int32_array; + std::unique_ptr int64_array; + std::unique_ptr string_array; + }; + + using attributes_t = std::map; + + attribute_t createAttributeWithType(const attribute_type type, const std::string & null_value) + { + attribute_t attr{type}; + + switch (type) + { + case attribute_type::uint8: + attr.uint8_null_value = DB::parse(null_value); + attr.uint8_array.reset(new UInt8[max_array_size]); + std::fill(attr.uint8_array.get(), attr.uint8_array.get() + max_array_size, attr.uint8_null_value); + break; + case attribute_type::uint16: + attr.uint16_null_value = DB::parse(null_value); + attr.uint16_array.reset(new UInt16[max_array_size]); + std::fill(attr.uint16_array.get(), attr.uint16_array.get() + max_array_size, attr.uint16_null_value); + break; + case attribute_type::uint32: + attr.uint32_null_value = DB::parse(null_value); + attr.uint32_array.reset(new UInt32[max_array_size]); + std::fill(attr.uint32_array.get(), attr.uint32_array.get() + max_array_size, attr.uint32_null_value); + break; + case attribute_type::uint64: + attr.uint64_null_value = DB::parse(null_value); + attr.uint64_array.reset(new UInt64[max_array_size]); + std::fill(attr.uint64_array.get(), attr.uint64_array.get() + max_array_size, attr.uint64_null_value); + break; + case attribute_type::int8: + attr.int8_null_value = DB::parse(null_value); + attr.int8_array.reset(new Int8[max_array_size]); + std::fill(attr.int8_array.get(), attr.int8_array.get() + max_array_size, attr.int8_null_value); + break; + case attribute_type::int16: + attr.int16_null_value = DB::parse(null_value); + attr.int16_array.reset(new Int16[max_array_size]); + std::fill(attr.int16_array.get(), attr.int16_array.get() + max_array_size, attr.int16_null_value); + break; + case attribute_type::int32: + attr.int32_null_value = DB::parse(null_value); + attr.int32_array.reset(new Int32[max_array_size]); + std::fill(attr.int32_array.get(), attr.int32_array.get() + max_array_size, attr.int32_null_value); + break; + case attribute_type::int64: + attr.int64_null_value = DB::parse(null_value); + attr.int64_array.reset(new Int64[max_array_size]); + std::fill(attr.int64_array.get(), attr.int64_array.get() + max_array_size, attr.int64_null_value); + break; + case attribute_type::string: + attr.string_null_value = null_value; + attr.string_array.reset(new String[max_array_size]); + std::fill(attr.string_array.get(), attr.string_array.get() + max_array_size, attr.string_null_value); + break; + } + + return attr; + } + + attribute_type getAttributeTypeByName(const std::string & type) + { + static const std::unordered_map dictionary{ + { "UInt8", attribute_type::uint8 }, + { "UInt16", attribute_type::uint16 }, + { "UInt32", attribute_type::uint32 }, + { "UInt64", attribute_type::uint64 }, + { "Int8", attribute_type::int8 }, + { "Int16", attribute_type::int16 }, + { "Int32", attribute_type::int32 }, + { "Int64", attribute_type::int64 }, + { "String", attribute_type::string }, + }; + + const auto it = dictionary.find(type); + if (it != std::end(dictionary)) + return it->second; + + throw Exception{ + "Unknown type " + type, + ErrorCodes::UNKNOWN_TYPE + }; + } + + void setAttributeValue(attribute_t & attribute, const id_t id, const Field & value) + { + if (id >= max_array_size) + throw Exception{ + "Identifier should be less than " + toString(max_array_size), + ErrorCodes::ARGUMENT_OUT_OF_BOUND + }; + + switch (attribute.type) + { + case attribute_type::uint8: attribute.uint8_array[id] = value.get(); break; + case attribute_type::uint16: attribute.uint16_array[id] = value.get(); break; + case attribute_type::uint32: attribute.uint32_array[id] = value.get(); break; + case attribute_type::uint64: attribute.uint64_array[id] = value.get(); break; + case attribute_type::int8: attribute.int8_array[id] = value.get(); break; + case attribute_type::int16: attribute.int16_array[id] = value.get(); break; + case attribute_type::int32: attribute.int32_array[id] = value.get(); break; + case attribute_type::int64: attribute.int64_array[id] = value.get(); break; + case attribute_type::string: attribute.string_array[id] = value.get(); break; + } + } + + static Block createSampleBlock(const DictionaryStructure & dict_struct, const Context & context) + { + Block block{ + ColumnWithNameAndType{ + new ColumnUInt64, + new DataTypeUInt64, + dict_struct.id_name + } + }; + + for (const auto & attribute : dict_struct.attributes) + { + const auto & type = context.getDataTypeFactory().get(attribute.type); + block.insert(ColumnWithNameAndType{ + type->createColumn(), type, attribute.name + }); + } + + return block; + } + + const attribute_t & findAttribute(const std::string & attribute_name) const + { + const auto it = attributes.find(attribute_name); + if (it == std::end(attributes)) + throw Exception{ + "No such attribute '" + attribute_name + "'", + ErrorCodes::BAD_ARGUMENTS + }; + + return it->second; + } + + attributes_t attributes; + const attribute_t * hierarchical_attribute = nullptr; }; } diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index 06bebea4a9a..c8611e23c23 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -11,12 +12,78 @@ class IDictionary public: using id_t = std::uint64_t; - virtual StringRef getString(const id_t id, const std::string & attribute_name) const = 0; virtual UInt64 getUInt64(const id_t id, const std::string & attribute_name) const = 0; + virtual StringRef getString(const id_t id, const std::string & attribute_name) const = 0; virtual ~IDictionary() = default; }; using DictionaryPtr = std::unique_ptr; +struct DictionaryAttribute +{ + std::string name; + std::string type; + std::string null_value; + bool hierarchical; + bool injective; +}; + +struct DictionaryStructure +{ + std::string id_name; + std::vector attributes; + + static DictionaryStructure fromXML(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix) + { + const auto & id_name = config.getString(config_prefix + ".id.name"); + if (id_name.empty()) + throw Exception{ + "No 'id' specified for dictionary", + ErrorCodes::BAD_ARGUMENTS + }; + + DictionaryStructure result{id_name}; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + auto has_hierarchy = false; + for (const auto & key : keys) + { + if (0 != strncmp(key.data(), "attribute", strlen("attribute"))) + continue; + + const auto & prefix = config_prefix + '.' + key + '.'; + const auto & name = config.getString(prefix + "name"); + const auto & type = config.getString(prefix + "type"); + const auto & null_value = config.getString(prefix + "null_value"); + const auto hierarchical = config.getBool(prefix + "hierarchical", false); + const auto injective = config.getBool(prefix + "injective", false); + if (name.empty() || type.empty()) + throw Exception{ + "Properties 'name' and 'type' of an attribute cannot be empty", + ErrorCodes::BAD_ARGUMENTS + }; + + if (has_hierarchy && hierarchical) + throw Exception{ + "Only one hierarchical attribute supported", + ErrorCodes::BAD_ARGUMENTS + }; + + has_hierarchy = has_hierarchy || hierarchical; + + result.attributes.emplace_back(DictionaryAttribute{name, type, null_value, hierarchical, injective}); + } + + if (result.attributes.empty()) + throw Exception{ + "Dictionary has no attributes defined", + ErrorCodes::BAD_ARGUMENTS + }; + + return result; + } +}; + } diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index 3863a5966ca..2f0559f55b9 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -908,4 +908,146 @@ private: const Dictionaries & dictionaries; }; + +template +class FunctionDictGetInteger: public IFunction +{ +public: + static const std::string name; + + static IFunction * create(const Context & context) + { + return new FunctionDictGetInteger{context.getDictionaries()}; + }; + + FunctionDictGetInteger(const Dictionaries & dictionaries) : dictionaries(dictionaries) {} + + String getName() const override { return name; } + +private: + DataTypePtr getReturnType(const DataTypes & arguments) const override + { + if (arguments.size() != 3) + throw Exception{ + "Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 3.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH + }; + + if (!typeid_cast(arguments[0].get())) + { + throw Exception{ + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + if (!typeid_cast(arguments[1].get())) + { + throw Exception{ + "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + const auto id_arg = arguments[2].get(); + if (!typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg)) + { + throw Exception{ + "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + return new typename DataTypeFromFieldType::Type; + } + + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) + { + const auto dict_name_col = typeid_cast *>(block.getByPosition(arguments[0]).column.get()); + if (!dict_name_col) + throw Exception{ + "First argument of function " + getName() + " must be a constant string", + ErrorCodes::ILLEGAL_COLUMN + }; + + auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + + const auto attr_name_col = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); + if (!attr_name_col) + throw Exception{ + "Second argument of function " + getName() + " must be a constant string", + ErrorCodes::ILLEGAL_COLUMN + }; + + const auto & attr_name = attr_name_col->getData(); + + const auto id_col = block.getByPosition(arguments[2]).column.get(); + if (!execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col)) + { + throw Exception{ + "Third argument of function " + getName() + " must be integral", + ErrorCodes::ILLEGAL_COLUMN + }; + } + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const std::string & attr_name, const IColumn * const id_col_untyped) + { + if (const auto id_col = typeid_cast *>(id_col_untyped)) + { + const auto out = new ColumnVector; + block.getByPosition(result).column = out; + + for (const auto & id : id_col->getData()) + out->insert(dictionary->getUInt64(id, attr_name)); + + return true; + } + else if (const auto id_col = typeid_cast *>(id_col_untyped)) + { + block.getByPosition(result).column = new ColumnConst{ + id_col->size(), + static_cast(dictionary->getUInt64(id_col->getData(), attr_name)) + }; + + return true; + }; + + return false; + } + + const Dictionaries & dictionaries; +}; + +template +const std::string FunctionDictGetInteger::name = "dictGet" + TypeName::get(); + + +using FunctionDictGetUInt8 = FunctionDictGetInteger; +using FunctionDictGetUInt16 = FunctionDictGetInteger; +using FunctionDictGetUInt32 = FunctionDictGetInteger; +using FunctionDictGetUInt64 = FunctionDictGetInteger; +using FunctionDictGetInt8 = FunctionDictGetInteger; +using FunctionDictGetInt16 = FunctionDictGetInteger; +using FunctionDictGetInt32 = FunctionDictGetInteger; +using FunctionDictGetInt64 = FunctionDictGetInteger; + + } diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index 5a5bbb72d87..b3d3d016504 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -13,7 +13,6 @@ #include #include -#include namespace DB @@ -21,6 +20,8 @@ namespace DB using Poco::SharedPtr; +class Context; + /// Словари Метрики, которые могут использоваться в функциях. class Dictionaries @@ -32,6 +33,7 @@ private: MultiVersion regions_names; std::unordered_map>> external_dictionaries; + const Context & context; /// Периодичность обновления справочников, в секундах. int reload_period; @@ -153,8 +155,8 @@ private: public: /// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд. - Dictionaries(int reload_period_ = 3600) - : reload_period(reload_period_), + Dictionaries(const Context & context, int reload_period_ = 3600) + : context(context), reload_period(reload_period_), log(&Logger::get("Dictionaries")) { reloadImpl(); diff --git a/dbms/src/Functions/FunctionsDictionaries.cpp b/dbms/src/Functions/FunctionsDictionaries.cpp index fc05edd9e89..fa8cf5d5b1d 100644 --- a/dbms/src/Functions/FunctionsDictionaries.cpp +++ b/dbms/src/Functions/FunctionsDictionaries.cpp @@ -25,6 +25,14 @@ void registerFunctionsDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 1a2e66b6887..1254d94bb81 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -496,7 +496,7 @@ const Dictionaries & Context::getDictionaries() const Poco::ScopedLock lock(shared->mutex); if (!shared->dictionaries) - shared->dictionaries = new Dictionaries; + shared->dictionaries = new Dictionaries{*this}; return *shared->dictionaries; } diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 72fbbc9c3f2..487704ed7b4 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -18,6 +18,9 @@ namespace void Dictionaries::reloadExternalDictionaries() { const auto config_path = Poco::Util::Application::instance().config().getString("dictionaries_config"); + if (config_path.empty()) + return; + const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; /// get all dictionaries' definitions @@ -29,28 +32,31 @@ void Dictionaries::reloadExternalDictionaries() { if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) { - /// @todo maybe output a warning + LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); continue; } - std::cout << key << std::endl; const auto & prefix = key + '.'; const auto & name = config->getString(prefix + "name"); if (name.empty()) { - /// @todo handle error, dictionary name cannot be empty + LOG_WARNING(log, "dictionary name cannot be empty"); + continue; } - auto dict_ptr = DictionaryFactory::instance().create(); - const auto it = external_dictionaries.find(name); - if (it == std::end(external_dictionaries)) + try { - external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); + auto dict_ptr = DictionaryFactory::instance().create(*config, prefix, context); + const auto it = external_dictionaries.find(name); + if (it == std::end(external_dictionaries)) + external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); + else + it->second->set(dict_ptr.release()); } - else + catch (const Exception &) { - it->second->set(dict_ptr.release()); + handleException(); } } }; From 616f8d03dc4f2dcd691fa2df2268defe17772c25 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 22 Jan 2015 17:33:29 +0300 Subject: [PATCH 03/43] dbms: Server: follow style guidelines --- dbms/src/Server/Server.cpp | 2 +- dbms/src/Server/Server.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 394e796e205..64f319032db 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -238,7 +238,7 @@ UsersConfigReloader::~UsersConfigReloader() quit = true; thread.join(); } - catch(...) + catch (...) { tryLogCurrentException("~UsersConfigReloader"); } diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h index f29440f4b02..c2847e53464 100644 --- a/dbms/src/Server/Server.h +++ b/dbms/src/Server/Server.h @@ -49,7 +49,7 @@ public: std::unique_ptr olap_converter; protected: - void initialize(Application& self) + void initialize(Application & self) { Daemon::initialize(self); logger().information("starting up"); @@ -61,7 +61,7 @@ protected: Daemon::uninitialize(); } - int main(const std::vector& args); + int main(const std::vector & args); }; } From a8c2d66076e14cf7860ad3ba90e774a40d8e3f7e Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 22 Jan 2015 17:41:05 +0300 Subject: [PATCH 04/43] dbms: fix error in populating attributes. [#METR-13298] --- dbms/include/DB/Dictionaries/FlatDictionary.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index da9cbf139f7..2e1a29ac03c 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -51,7 +51,7 @@ public: const auto & attribute_column = *block.getByPosition(attribute_idx).column; auto & attribute = attributes[dict_struct.attributes[attribute_idx - 1].name]; - for (const auto row_idx : ext::range(1, attribute_column.size())) + for (const auto row_idx : ext::range(0, id_column.size())) setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); } } From 419d517867e46fbce5f2343cf207ea398476ec64 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 26 Jan 2015 18:27:51 +0300 Subject: [PATCH 05/43] dbms: add abstractions for dictionary sources --- dbms/include/DB/Core/StringRef.h | 2 +- .../DB/Dictionaries/DictionaryFactory.h | 11 +- .../DB/Dictionaries/DictionarySourceFactory.h | 69 +++++++++++ .../DB/Dictionaries/DictionaryStructure.h | 77 +++++++++++++ .../DB/Dictionaries/FileDictionarySource.h | 56 +++++++++ dbms/include/DB/Dictionaries/FlatDictionary.h | 95 +++++++-------- dbms/include/DB/Dictionaries/IDictionary.h | 69 +---------- .../DB/Dictionaries/IDictionarySource.h | 23 ++++ dbms/include/DB/Interpreters/Dictionaries.h | 40 +++++-- dbms/src/Interpreters/Dictionaries.cpp | 108 ++++++++++++------ libs/libmysqlxx/include/mysqlxx/Pool.h | 26 ++++- 11 files changed, 405 insertions(+), 171 deletions(-) create mode 100644 dbms/include/DB/Dictionaries/DictionarySourceFactory.h create mode 100644 dbms/include/DB/Dictionaries/DictionaryStructure.h create mode 100644 dbms/include/DB/Dictionaries/FileDictionarySource.h create mode 100644 dbms/include/DB/Dictionaries/IDictionarySource.h diff --git a/dbms/include/DB/Core/StringRef.h b/dbms/include/DB/Core/StringRef.h index 8ab432210a9..e7ca944e33a 100644 --- a/dbms/include/DB/Core/StringRef.h +++ b/dbms/include/DB/Core/StringRef.h @@ -21,7 +21,7 @@ struct StringRef StringRef(const char * data_, size_t size_) : data(data_), size(size_) {} StringRef(const unsigned char * data_, size_t size_) : data(reinterpret_cast(data_)), size(size_) {} StringRef(const std::string & s) : data(s.data()), size(s.size()) {} - StringRef() {} + StringRef() = default; std::string toString() const { return std::string(data, size); } }; diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index 0249715c24e..1b531cdc332 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -14,13 +16,16 @@ public: DictionaryPtr create(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix, const Context & context) const { - const auto & layout_prefix = config_prefix + "layout."; - auto dict_struct = DictionaryStructure::fromXML(config, config_prefix + "structure"); + auto source_ptr = DictionarySourceFactory::instance().create( + config, config_prefix + "source.", dict_struct, context); + + const auto & layout_prefix = config_prefix + "layout."; + if (config.has(layout_prefix + "flat")) { - return ext::make_unique(dict_struct, config, config_prefix, context); + return ext::make_unique(dict_struct, config, config_prefix, std::move(source_ptr)); } else if (config.has(layout_prefix + "hashed")) { diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h new file mode 100644 index 00000000000..91073abc964 --- /dev/null +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +Block createSampleBlock(const DictionaryStructure & dict_struct, const Context & context) +{ + Block block{ + ColumnWithNameAndType{ + new ColumnUInt64, + new DataTypeUInt64, + dict_struct.id_name + } + }; + + for (const auto & attribute : dict_struct.attributes) + { + const auto & type = context.getDataTypeFactory().get(attribute.type); + block.insert(ColumnWithNameAndType{ + type->createColumn(), type, attribute.name + }); + } + + return block; +} + +} + +class DictionarySourceFactory : public Singleton +{ +public: + DictionarySourcePtr create(const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DictionaryStructure & dict_struct, + const Context & context) const + { + auto sample_block = createSampleBlock(dict_struct, context); + + if (config.has(config_prefix + "file")) + { + const auto & filename = config.getString(config_prefix + "file.path"); + const auto & format = config.getString(config_prefix + "file.format"); + return ext::make_unique(filename, format, sample_block, context); + } + else if (config.has(config_prefix + "mysql")) + { + throw Exception{ + "source.mysql not yet implemented", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + throw Exception{ + "unsupported source type" + }; + } +}; + +} diff --git a/dbms/include/DB/Dictionaries/DictionaryStructure.h b/dbms/include/DB/Dictionaries/DictionaryStructure.h new file mode 100644 index 00000000000..9720f5d87d8 --- /dev/null +++ b/dbms/include/DB/Dictionaries/DictionaryStructure.h @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +struct DictionaryAttribute +{ + std::string name; + std::string type; + std::string null_value; + bool hierarchical; + bool injective; +}; + +struct DictionaryStructure +{ + std::string id_name; + std::vector attributes; + + static DictionaryStructure fromXML(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix) + { + const auto & id_name = config.getString(config_prefix + ".id.name"); + if (id_name.empty()) + throw Exception{ + "No 'id' specified for dictionary", + ErrorCodes::BAD_ARGUMENTS + }; + + DictionaryStructure result{id_name}; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + auto has_hierarchy = false; + for (const auto & key : keys) + { + if (0 != strncmp(key.data(), "attribute", strlen("attribute"))) + continue; + + const auto & prefix = config_prefix + '.' + key + '.'; + const auto & name = config.getString(prefix + "name"); + const auto & type = config.getString(prefix + "type"); + const auto & null_value = config.getString(prefix + "null_value"); + const auto hierarchical = config.getBool(prefix + "hierarchical", false); + const auto injective = config.getBool(prefix + "injective", false); + if (name.empty() || type.empty()) + throw Exception{ + "Properties 'name' and 'type' of an attribute cannot be empty", + ErrorCodes::BAD_ARGUMENTS + }; + + if (has_hierarchy && hierarchical) + throw Exception{ + "Only one hierarchical attribute supported", + ErrorCodes::BAD_ARGUMENTS + }; + + has_hierarchy = has_hierarchy || hierarchical; + + result.attributes.emplace_back(DictionaryAttribute{name, type, null_value, hierarchical, injective}); + } + + if (result.attributes.empty()) + throw Exception{ + "Dictionary has no attributes defined", + ErrorCodes::BAD_ARGUMENTS + }; + + return result; + } +}; + +} diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h new file mode 100644 index 00000000000..0218a663979 --- /dev/null +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +const auto max_block_size = 8192; + +class FileDictionarySource final : public IDictionarySource +{ +public: + FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, + const Context & context) + : filename{filename}, format{format}, sample_block{sample_block}, context(context) {} + +private: + BlockInputStreamPtr loadAll() override + { + in_ptr = ext::make_unique(filename); + return context.getFormatFactory().getInput( + format, *in_ptr, sample_block, max_block_size, context.getDataTypeFactory()); + } + + BlockInputStreamPtr loadId(const std::uint64_t id) override + { + throw Exception{ + "Method unsupported", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + BlockInputStreamPtr loadIds(const std::vector ids) override + { + throw Exception{ + "Method unsupported", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + void reset() override + { + in_ptr.reset(nullptr); + } + + const std::string filename; + const std::string format; + Block sample_block; + const Context & context; + + std::unique_ptr in_ptr; +}; + +} diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 2e1a29ac03c..9e3a8cf8d08 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -1,10 +1,7 @@ #pragma once +#include #include -#include -#include -#include -#include #include #include #include @@ -13,15 +10,16 @@ namespace DB { +const auto initial_array_size = 128; const auto max_array_size = 500000; -const auto max_block_size = 8192; /// @todo manage arrays using std::vector or PODArray, start with an initial size, expand up to max_array_size -class FlatDictionary : public IDictionary +class FlatDictionary final : public IDictionary { public: FlatDictionary(const DictionaryStructure & dict_struct, const Poco::Util::XMLConfiguration & config, - const std::string & config_prefix, const Context & context) + const std::string & config_prefix, DictionarySourcePtr source_ptr) + : source_ptr{std::move(source_ptr)} { for (const auto & attribute : dict_struct.attributes) { @@ -32,32 +30,27 @@ public: hierarchical_attribute = &attributes[attribute.name]; } - if (config.has(config_prefix + "source.file")) + auto stream = this->source_ptr->loadAll(); + + while (const auto block = stream->read()) { - const auto & file_name = config.getString(config_prefix + "source.file.path"); - const auto & format = config.getString(config_prefix + "source.file.format"); + const auto & id_column = *block.getByPosition(0).column; - ReadBufferFromFile in{file_name}; - auto sample_block = createSampleBlock(dict_struct, context); - auto stream = context.getFormatFactory().getInput( - format, in, sample_block, max_block_size, context.getDataTypeFactory()); - - while (const auto block = stream->read()) + for (const auto attribute_idx : ext::range(1, attributes.size())) { - const auto & id_column = *block.getByPosition(0).column; + const auto & attribute_column = *block.getByPosition(attribute_idx).column; + auto & attribute = attributes[dict_struct.attributes[attribute_idx - 1].name]; - for (const auto attribute_idx : ext::range(1, attributes.size())) - { - const auto & attribute_column = *block.getByPosition(attribute_idx).column; - auto & attribute = attributes[dict_struct.attributes[attribute_idx - 1].name]; - - for (const auto row_idx : ext::range(0, id_column.size())) - setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); - } + for (const auto row_idx : ext::range(0, id_column.size())) + setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); } } + + /// @todo wrap source_ptr so that it reset buffer automatically + this->source_ptr->reset(); } +private: UInt64 getUInt64(const id_t id, const std::string & attribute_name) const override { const auto & attribute = findAttribute(attribute_name); @@ -74,7 +67,7 @@ public: return attribute.uint64_null_value; } - StringRef getString(const id_t id, const std::string & attribute_name) const override + StringRef getString(const id_t id, const std::string & attribute_name) const override { const auto & attribute = findAttribute(attribute_name); @@ -85,12 +78,13 @@ public: }; if (id < max_array_size) - return { attribute.string_array[id].data(), attribute.string_array[id].size() }; + return attribute.string_array[id]; return { attribute.string_null_value.data(), attribute.string_null_value.size() }; } -private: + bool isComplete() const override { return true; } + enum class attribute_type { uint8, @@ -124,7 +118,8 @@ private: std::unique_ptr int16_array; std::unique_ptr int32_array; std::unique_ptr int64_array; - std::unique_ptr string_array; + std::unique_ptr string_arena; + std::vector string_array; }; using attributes_t = std::map; @@ -177,8 +172,10 @@ private: break; case attribute_type::string: attr.string_null_value = null_value; - attr.string_array.reset(new String[max_array_size]); - std::fill(attr.string_array.get(), attr.string_array.get() + max_array_size, attr.string_null_value); + attr.string_arena.reset(new Arena); + attr.string_array.resize(initial_array_size, StringRef{ + attr.string_null_value.data(), attr.string_null_value.size() + }); break; } @@ -227,29 +224,23 @@ private: case attribute_type::int16: attribute.int16_array[id] = value.get(); break; case attribute_type::int32: attribute.int32_array[id] = value.get(); break; case attribute_type::int64: attribute.int64_array[id] = value.get(); break; - case attribute_type::string: attribute.string_array[id] = value.get(); break; - } - } + case attribute_type::string: + { + const auto & string = value.get(); + const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); - static Block createSampleBlock(const DictionaryStructure & dict_struct, const Context & context) - { - Block block{ - ColumnWithNameAndType{ - new ColumnUInt64, - new DataTypeUInt64, - dict_struct.id_name + const auto current_size = attribute.string_array.size(); + if (id >= current_size) + attribute.string_array.resize( + std::min(max_array_size, 2 * current_size > id ? 2 * current_size : 2 * id), + StringRef{ + attribute.string_null_value.data(), attribute.string_null_value.size() + }); + + attribute.string_array[id] = StringRef{string_in_arena, string.size()}; + break; } - }; - - for (const auto & attribute : dict_struct.attributes) - { - const auto & type = context.getDataTypeFactory().get(attribute.type); - block.insert(ColumnWithNameAndType{ - type->createColumn(), type, attribute.name - }); } - - return block; } const attribute_t & findAttribute(const std::string & attribute_name) const @@ -266,6 +257,8 @@ private: attributes_t attributes; const attribute_t * hierarchical_attribute = nullptr; + + DictionarySourcePtr source_ptr; }; } diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index c8611e23c23..facae765e05 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -15,75 +15,12 @@ public: virtual UInt64 getUInt64(const id_t id, const std::string & attribute_name) const = 0; virtual StringRef getString(const id_t id, const std::string & attribute_name) const = 0; + virtual bool isComplete() const = 0; + virtual void reload() {} + virtual ~IDictionary() = default; }; using DictionaryPtr = std::unique_ptr; -struct DictionaryAttribute -{ - std::string name; - std::string type; - std::string null_value; - bool hierarchical; - bool injective; -}; - -struct DictionaryStructure -{ - std::string id_name; - std::vector attributes; - - static DictionaryStructure fromXML(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix) - { - const auto & id_name = config.getString(config_prefix + ".id.name"); - if (id_name.empty()) - throw Exception{ - "No 'id' specified for dictionary", - ErrorCodes::BAD_ARGUMENTS - }; - - DictionaryStructure result{id_name}; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - auto has_hierarchy = false; - for (const auto & key : keys) - { - if (0 != strncmp(key.data(), "attribute", strlen("attribute"))) - continue; - - const auto & prefix = config_prefix + '.' + key + '.'; - const auto & name = config.getString(prefix + "name"); - const auto & type = config.getString(prefix + "type"); - const auto & null_value = config.getString(prefix + "null_value"); - const auto hierarchical = config.getBool(prefix + "hierarchical", false); - const auto injective = config.getBool(prefix + "injective", false); - if (name.empty() || type.empty()) - throw Exception{ - "Properties 'name' and 'type' of an attribute cannot be empty", - ErrorCodes::BAD_ARGUMENTS - }; - - if (has_hierarchy && hierarchical) - throw Exception{ - "Only one hierarchical attribute supported", - ErrorCodes::BAD_ARGUMENTS - }; - - has_hierarchy = has_hierarchy || hierarchical; - - result.attributes.emplace_back(DictionaryAttribute{name, type, null_value, hierarchical, injective}); - } - - if (result.attributes.empty()) - throw Exception{ - "Dictionary has no attributes defined", - ErrorCodes::BAD_ARGUMENTS - }; - - return result; - } -}; - } diff --git a/dbms/include/DB/Dictionaries/IDictionarySource.h b/dbms/include/DB/Dictionaries/IDictionarySource.h new file mode 100644 index 00000000000..6efc8619754 --- /dev/null +++ b/dbms/include/DB/Dictionaries/IDictionarySource.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class IDictionarySource +{ +public: + virtual BlockInputStreamPtr loadAll() = 0; + virtual BlockInputStreamPtr loadId(const std::uint64_t id) = 0; + virtual BlockInputStreamPtr loadIds(const std::vector ids) = 0; + + virtual void reset() {} + + virtual ~IDictionarySource() = default; +}; + +using DictionarySourcePtr = std::unique_ptr; + +} diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index b3d3d016504..41d59eb2f1d 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -38,10 +39,14 @@ private: int reload_period; std::thread reloading_thread; + std::thread reloading_externals_thread; Poco::Event destroy; Logger * log; + std::mutex externals_mutex; + Poco::Timestamp dictionaries_last_modified{0}; + void handleException() const { @@ -123,23 +128,12 @@ private: was_exception = true; } - try - { - reloadExternalDictionaries(); - } - catch (...) - { - handleException(); - was_exception = true; - } - - if (!was_exception) LOG_INFO(log, "Loaded dictionaries."); } - void reloadExternalDictionaries(); + void reloadExternals(); /// Обновляет каждые reload_period секунд. void reloadPeriodically() @@ -153,6 +147,18 @@ private: } } + void reloadExternalsPeriodically() + { + const auto check_period = 1000; + while (true) + { + if (destroy.tryWait(check_period)) + return; + + reloadExternals(); + } + } + public: /// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд. Dictionaries(const Context & context, int reload_period_ = 3600) @@ -160,13 +166,16 @@ public: log(&Logger::get("Dictionaries")) { reloadImpl(); + reloadExternals(); reloading_thread = std::thread([this] { reloadPeriodically(); }); + reloading_externals_thread = std::thread{&Dictionaries::reloadExternalsPeriodically, this}; } ~Dictionaries() { destroy.set(); reloading_thread.join(); + reloading_externals_thread.join(); } MultiVersion::Version getRegionsHierarchies() const @@ -191,6 +200,13 @@ public: MultiVersion::Version getExternalDictionary(const std::string & name) const { + std::cout << "there are dictionaries: "; + std::transform(std::begin(external_dictionaries), std::end(external_dictionaries), + std::ostream_iterator{std::cout, ", "}, + [] (const std::pair>> & pair) { + return pair.first; + }); + std::cout << std::endl; const auto it = external_dictionaries.find(name); if (it == std::end(external_dictionaries)) throw Exception{ diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 487704ed7b4..f99f487b62c 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -15,50 +15,92 @@ namespace template using config_ptr_t = std::unique_ptr>; }; -void Dictionaries::reloadExternalDictionaries() +void Dictionaries::reloadExternals() { + const std::lock_guard lock{externals_mutex}; + const auto config_path = Poco::Util::Application::instance().config().getString("dictionaries_config"); if (config_path.empty()) return; - const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; - - /// get all dictionaries' definitions - Poco::Util::AbstractConfiguration::Keys keys; - config->keys(keys); - - /// for each dictionary defined in xml config - for (const auto & key : keys) + const auto last_modified = Poco::File{config_path}.getLastModified(); + if (last_modified > dictionaries_last_modified) { - if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) - { - LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); - continue; - } + dictionaries_last_modified = last_modified; - const auto & prefix = key + '.'; + const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; - const auto & name = config->getString(prefix + "name"); - if (name.empty()) - { - LOG_WARNING(log, "dictionary name cannot be empty"); - continue; - } + /// get all dictionaries' definitions + Poco::Util::AbstractConfiguration::Keys keys; + config->keys(keys); - try + /// for each dictionary defined in xml config + for (const auto & key : keys) { - auto dict_ptr = DictionaryFactory::instance().create(*config, prefix, context); - const auto it = external_dictionaries.find(name); - if (it == std::end(external_dictionaries)) - external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); - else - it->second->set(dict_ptr.release()); - } - catch (const Exception &) - { - handleException(); + if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) + { + LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); + continue; + } + + const auto & prefix = key + '.'; + + const auto & name = config->getString(prefix + "name"); + if (name.empty()) + { + LOG_WARNING(log, "dictionary name cannot be empty"); + continue; + } + + try + { + auto it = external_dictionaries.find(name); + if (it == std::end(external_dictionaries)) + { + auto dict_ptr = DictionaryFactory::instance().create(*config, prefix, context); + external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); + } + else + { + auto & current = it->second->get(); + if (current->isComplete()) + { + /// @todo check that timeout has passed + auto dict_ptr = DictionaryFactory::instance().create(*config, prefix, context); + it->second->set(dict_ptr.release()); + } + else + const_cast(current.get())->reload(); + } + } + catch (const Exception &) + { + handleException(); + } } } -}; + else + { + for (auto & dictionary : external_dictionaries) + { + try + { + auto & current = dictionary.second->get(); + if (current->isComplete()) + { + /// @todo check that timeout has passed and load new version + } + else + { + const_cast(current.get())->reload(); + } + } + catch (const Exception &) + { + handleException(); + } + } + } +} } diff --git a/libs/libmysqlxx/include/mysqlxx/Pool.h b/libs/libmysqlxx/include/mysqlxx/Pool.h index b3821215274..03957b47f2d 100644 --- a/libs/libmysqlxx/include/mysqlxx/Pool.h +++ b/libs/libmysqlxx/include/mysqlxx/Pool.h @@ -197,14 +197,30 @@ public: * @param max_connections_ Максимальное количество подключений */ Pool(const std::string & config_name, - unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, - unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, - const char * parent_config_name_ = nullptr) + unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, + unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, + const char * parent_config_name_ = nullptr) + : Pool{ + Poco::Util::Application::instance().config(), config_name, + default_connections_, max_connections_, parent_config_name_ + } + {} + + + /** + * @param cfg Конфигурация + * @param config_name Имя параметра в конфигурационном файле + * @param default_connections_ Количество подключений по-умолчанию + * @param max_connections_ Максимальное количество подключений + */ + Pool(Poco::Util::LayeredConfiguration & cfg, + const std::string & config_name, + unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, + unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, + const char * parent_config_name_ = nullptr) : default_connections(default_connections_), max_connections(max_connections_), initialized(false), was_successful(false) { - Poco::Util::LayeredConfiguration & cfg = Poco::Util::Application::instance().config(); - server = cfg.getString(config_name + ".host"); if (parent_config_name_) From 1b4b0d5779bd08b2aa7cab37d8a876b864fed6e2 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 26 Jan 2015 19:53:44 +0300 Subject: [PATCH 06/43] dbms: MysqlDictionarySource with draft MysqlBockInputStream --- .../DB/Dictionaries/DictionaryFactory.h | 4 +- .../DB/Dictionaries/DictionarySourceFactory.h | 12 +- .../DB/Dictionaries/DictionaryStructure.h | 4 +- .../DB/Dictionaries/FileDictionarySource.h | 4 +- dbms/include/DB/Dictionaries/FlatDictionary.h | 4 +- .../DB/Dictionaries/MysqlDictionarySource.h | 112 ++++++++++++++++++ dbms/include/DB/Dictionaries/config_ptr_t.h | 15 +++ dbms/include/DB/Interpreters/Dictionaries.h | 7 -- dbms/src/Interpreters/Dictionaries.cpp | 11 +- 9 files changed, 140 insertions(+), 33 deletions(-) create mode 100644 dbms/include/DB/Dictionaries/MysqlDictionarySource.h create mode 100644 dbms/include/DB/Dictionaries/config_ptr_t.h diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index 1b531cdc332..6a5836ab320 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -13,10 +13,10 @@ namespace DB class DictionaryFactory : public Singleton { public: - DictionaryPtr create(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix, + DictionaryPtr create(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Context & context) const { - auto dict_struct = DictionaryStructure::fromXML(config, config_prefix + "structure"); + auto dict_struct = DictionaryStructure::fromConfig(config, config_prefix + "structure"); auto source_ptr = DictionarySourceFactory::instance().create( config, config_prefix + "source.", dict_struct, context); diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h index 91073abc964..63ccadcc005 100644 --- a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -39,7 +40,7 @@ Block createSampleBlock(const DictionaryStructure & dict_struct, const Context & class DictionarySourceFactory : public Singleton { public: - DictionarySourcePtr create(const Poco::Util::AbstractConfiguration & config, + DictionarySourcePtr create(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct, const Context & context) const @@ -54,15 +55,10 @@ public: } else if (config.has(config_prefix + "mysql")) { - throw Exception{ - "source.mysql not yet implemented", - ErrorCodes::NOT_IMPLEMENTED - }; + return ext::make_unique(config, config_prefix + "mysql.", sample_block, context); } - throw Exception{ - "unsupported source type" - }; + throw Exception{"unsupported source type"}; } }; diff --git a/dbms/include/DB/Dictionaries/DictionaryStructure.h b/dbms/include/DB/Dictionaries/DictionaryStructure.h index 9720f5d87d8..cf7b68b644a 100644 --- a/dbms/include/DB/Dictionaries/DictionaryStructure.h +++ b/dbms/include/DB/Dictionaries/DictionaryStructure.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -22,7 +22,7 @@ struct DictionaryStructure std::string id_name; std::vector attributes; - static DictionaryStructure fromXML(const Poco::Util::XMLConfiguration & config, const std::string & config_prefix) + static DictionaryStructure fromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) { const auto & id_name = config.getString(config_prefix + ".id.name"); if (id_name.empty()) diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 0218a663979..0502fbab4f1 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -7,10 +7,10 @@ namespace DB { -const auto max_block_size = 8192; - class FileDictionarySource final : public IDictionarySource { + static const auto max_block_size = 8192; + public: FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, const Context & context) diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 9e3a8cf8d08..221414cf904 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -17,7 +17,7 @@ const auto max_array_size = 500000; class FlatDictionary final : public IDictionary { public: - FlatDictionary(const DictionaryStructure & dict_struct, const Poco::Util::XMLConfiguration & config, + FlatDictionary(const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr) : source_ptr{std::move(source_ptr)} { diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h new file mode 100644 index 00000000000..f2448decc55 --- /dev/null +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class MysqlBlockInputStream final : public IProfilingBlockInputStream +{ +public: + MysqlBlockInputStream(mysqlxx::Query query, const Block & sample_block, const std::size_t max_block_size) + : query{std::move(query)}, result{query.use()}, sample_block{sample_block}, max_block_size{max_block_size} + { + } + + String getName() const override { return "MysqlBlockInputStream"; } + + String getID() const override + { + return "Mysql(" + query.str() + ")"; + } + +private: + Block readImpl() override + { + auto block = sample_block.cloneEmpty(); + + std::size_t rows = 0; + while (auto row = result.fetch()) + { + for (const auto idx : ext::range(0, row.size())) + /// @todo type switch to get the real value from row[idx] + block.getByPosition(idx).column->insert(Field{}); + + ++rows; + if (rows == max_block_size) + break; + } + + return block; + } + + mysqlxx::Query query; + mysqlxx::UseQueryResult result; + Block sample_block; + std::size_t max_block_size; +}; + +class MysqlDictionarySource final : public IDictionarySource +{ + static const auto max_block_size = 8192; + +public: + MysqlDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + Block & sample_block, const Context & context) + : layered_config_ptr{getLayeredConfig(config)}, + pool{*layered_config_ptr, config_prefix}, + sample_block{sample_block}, context(context) {} + +private: + BlockInputStreamPtr loadAll() override + { + auto connection = pool.Get(); + auto query = connection->query("SELECT 1+1;"); + auto result = query.use(); + while (auto row = result.fetch()) + { + for (const auto idx : ext::range(0, row.size())) + std::cout << row[idx].getString() << ' '; + std::cout << std::endl; + } + return new MysqlBlockInputStream{pool.Get()->query(""), sample_block, max_block_size}; + } + + BlockInputStreamPtr loadId(const std::uint64_t id) override + { + throw Exception{ + "Method unsupported", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + BlockInputStreamPtr loadIds(const std::vector ids) override + { + throw Exception{ + "Method unsupported", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + static config_ptr_t getLayeredConfig(Poco::Util::AbstractConfiguration & config) + { + config_ptr_t layered_config{new Poco::Util::LayeredConfiguration}; + layered_config->add(&config); + return layered_config; + } + + const config_ptr_t layered_config_ptr; + mysqlxx::Pool pool; + Block sample_block; + const Context & context; +}; + +} diff --git a/dbms/include/DB/Dictionaries/config_ptr_t.h b/dbms/include/DB/Dictionaries/config_ptr_t.h new file mode 100644 index 00000000000..f5f29792cc7 --- /dev/null +++ b/dbms/include/DB/Dictionaries/config_ptr_t.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ + +template struct release +{ + void operator()(const T * const ptr) { ptr->release(); } +}; + +template using config_ptr_t = std::unique_ptr>; + +} diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index 41d59eb2f1d..7ad53a4844e 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -200,13 +200,6 @@ public: MultiVersion::Version getExternalDictionary(const std::string & name) const { - std::cout << "there are dictionaries: "; - std::transform(std::begin(external_dictionaries), std::end(external_dictionaries), - std::ostream_iterator{std::cout, ", "}, - [] (const std::pair>> & pair) { - return pair.first; - }); - std::cout << std::endl; const auto it = external_dictionaries.find(name); if (it == std::end(external_dictionaries)) throw Exception{ diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index f99f487b62c..93b46c6ffe5 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -1,20 +1,11 @@ #include #include -#include +#include namespace DB { -namespace -{ - template struct release - { - void operator()(const T * const ptr) { ptr->release(); } - }; - template using config_ptr_t = std::unique_ptr>; -}; - void Dictionaries::reloadExternals() { const std::lock_guard lock{externals_mutex}; From 6b406df4d9d826a1505cb95d5023e81ee0362bc6 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 27 Jan 2015 16:00:20 +0300 Subject: [PATCH 07/43] dbms: implement MysqlBlockInputStream. [#METR-13298] --- .../DB/Dictionaries/DictionaryStructure.h | 42 +++++++ dbms/include/DB/Dictionaries/FlatDictionary.h | 60 +++------ .../DB/Dictionaries/MysqlBlockInputStream.h | 114 ++++++++++++++++++ .../DB/Dictionaries/MysqlDictionarySource.h | 80 +++++------- 4 files changed, 202 insertions(+), 94 deletions(-) create mode 100644 dbms/include/DB/Dictionaries/MysqlBlockInputStream.h diff --git a/dbms/include/DB/Dictionaries/DictionaryStructure.h b/dbms/include/DB/Dictionaries/DictionaryStructure.h index cf7b68b644a..c23468c3551 100644 --- a/dbms/include/DB/Dictionaries/DictionaryStructure.h +++ b/dbms/include/DB/Dictionaries/DictionaryStructure.h @@ -4,10 +4,52 @@ #include #include #include +#include namespace DB { +enum class attribute_type +{ + uint8, + uint16, + uint32, + uint64, + int8, + int16, + int32, + int64, + float32, + float64, + string +}; + +attribute_type getAttributeTypeByName(const std::string & type) +{ + static const std::unordered_map dictionary{ + { "UInt8", attribute_type::uint8 }, + { "UInt16", attribute_type::uint16 }, + { "UInt32", attribute_type::uint32 }, + { "UInt64", attribute_type::uint64 }, + { "Int8", attribute_type::int8 }, + { "Int16", attribute_type::int16 }, + { "Int32", attribute_type::int32 }, + { "Int64", attribute_type::int64 }, + { "Float32", attribute_type::float32 }, + { "Float64", attribute_type::float64 }, + { "String", attribute_type::string }, + }; + + const auto it = dictionary.find(type); + if (it != std::end(dictionary)) + return it->second; + + throw Exception{ + "Unknown type " + type, + ErrorCodes::UNKNOWN_TYPE + }; +} + struct DictionaryAttribute { std::string name; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 221414cf904..7a671d471f7 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -4,7 +4,6 @@ #include #include #include -#include #include namespace DB @@ -36,10 +35,10 @@ public: { const auto & id_column = *block.getByPosition(0).column; - for (const auto attribute_idx : ext::range(1, attributes.size())) + for (const auto attribute_idx : ext::range(0, attributes.size())) { - const auto & attribute_column = *block.getByPosition(attribute_idx).column; - auto & attribute = attributes[dict_struct.attributes[attribute_idx - 1].name]; + const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; + auto & attribute = attributes[dict_struct.attributes[attribute_idx].name]; for (const auto row_idx : ext::range(0, id_column.size())) setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); @@ -85,19 +84,6 @@ private: bool isComplete() const override { return true; } - enum class attribute_type - { - uint8, - uint16, - uint32, - uint64, - int8, - int16, - int32, - int64, - string - }; - struct attribute_t { attribute_type type; @@ -109,6 +95,8 @@ private: Int16 int16_null_value; Int32 int32_null_value; Int64 int64_null_value; + Float32 float32_null_value; + Float64 float64_null_value; String string_null_value; std::unique_ptr uint8_array; std::unique_ptr uint16_array; @@ -118,6 +106,8 @@ private: std::unique_ptr int16_array; std::unique_ptr int32_array; std::unique_ptr int64_array; + std::unique_ptr float32_array; + std::unique_ptr float64_array; std::unique_ptr string_arena; std::vector string_array; }; @@ -170,6 +160,16 @@ private: attr.int64_array.reset(new Int64[max_array_size]); std::fill(attr.int64_array.get(), attr.int64_array.get() + max_array_size, attr.int64_null_value); break; + case attribute_type::float32: + attr.float32_null_value = DB::parse(null_value); + attr.float32_array.reset(new Float32[max_array_size]); + std::fill(attr.float32_array.get(), attr.float32_array.get() + max_array_size, attr.float32_null_value); + break; + case attribute_type::float64: + attr.float64_null_value = DB::parse(null_value); + attr.float64_array.reset(new Float64[max_array_size]); + std::fill(attr.float64_array.get(), attr.float64_array.get() + max_array_size, attr.float64_null_value); + break; case attribute_type::string: attr.string_null_value = null_value; attr.string_arena.reset(new Arena); @@ -182,30 +182,6 @@ private: return attr; } - attribute_type getAttributeTypeByName(const std::string & type) - { - static const std::unordered_map dictionary{ - { "UInt8", attribute_type::uint8 }, - { "UInt16", attribute_type::uint16 }, - { "UInt32", attribute_type::uint32 }, - { "UInt64", attribute_type::uint64 }, - { "Int8", attribute_type::int8 }, - { "Int16", attribute_type::int16 }, - { "Int32", attribute_type::int32 }, - { "Int64", attribute_type::int64 }, - { "String", attribute_type::string }, - }; - - const auto it = dictionary.find(type); - if (it != std::end(dictionary)) - return it->second; - - throw Exception{ - "Unknown type " + type, - ErrorCodes::UNKNOWN_TYPE - }; - } - void setAttributeValue(attribute_t & attribute, const id_t id, const Field & value) { if (id >= max_array_size) @@ -224,6 +200,8 @@ private: case attribute_type::int16: attribute.int16_array[id] = value.get(); break; case attribute_type::int32: attribute.int32_array[id] = value.get(); break; case attribute_type::int64: attribute.int64_array[id] = value.get(); break; + case attribute_type::float32: attribute.float32_array[id] = value.get(); break; + case attribute_type::float64: attribute.float64_array[id] = value.get(); break; case attribute_type::string: { const auto & string = value.get(); diff --git a/dbms/include/DB/Dictionaries/MysqlBlockInputStream.h b/dbms/include/DB/Dictionaries/MysqlBlockInputStream.h new file mode 100644 index 00000000000..120a02ac47f --- /dev/null +++ b/dbms/include/DB/Dictionaries/MysqlBlockInputStream.h @@ -0,0 +1,114 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class MysqlBlockInputStream final : public IProfilingBlockInputStream +{ +public: + MysqlBlockInputStream(mysqlxx::Query query, const Block & sample_block, const std::size_t max_block_size) + : query{std::move(query)}, result{query.use()}, sample_block{sample_block}, max_block_size{max_block_size} + { + types.reserve(sample_block.columns()); + + for (const auto idx : ext::range(0, sample_block.columns())) + { + const auto type = sample_block.getByPosition(idx).type.get(); + if (typeid_cast(type)) + types.push_back(attribute_type::uint8); + else if (typeid_cast(type)) + types.push_back(attribute_type::uint16); + else if (typeid_cast(type)) + types.push_back(attribute_type::uint32); + else if (typeid_cast(type)) + types.push_back(attribute_type::uint64); + else if (typeid_cast(type)) + types.push_back(attribute_type::int8); + else if (typeid_cast(type)) + types.push_back(attribute_type::int16); + else if (typeid_cast(type)) + types.push_back(attribute_type::int32); + else if (typeid_cast(type)) + types.push_back(attribute_type::int64); + else if (typeid_cast(type)) + types.push_back(attribute_type::float32); + else if (typeid_cast(type)) + types.push_back(attribute_type::float64); + else if (typeid_cast(type)) + types.push_back(attribute_type::string); + else + throw Exception{ + "Unsupported type " + type->getName(), + ErrorCodes::UNKNOWN_TYPE + }; + } + } + + String getName() const override { return "MysqlBlockInputStream"; } + + String getID() const override + { + return "Mysql(" + query.str() + ")"; + } + +private: + Block readImpl() override + { + auto block = sample_block.cloneEmpty(); + + if (block.columns() != result.getNumFields()) + throw Exception{ + "mysqlxx::UserQueryResult contains " + toString(result.getNumFields()) + " columns while " + + toString(block.columns()) + " expected", + ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH + }; + + std::size_t rows = 0; + while (auto row = result.fetch()) + { + for (const auto idx : ext::range(0, row.size())) + insertValue(block.getByPosition(idx).column, row[idx], types[idx]); + + ++rows; + if (rows == max_block_size) + break; + } + + return rows == 0 ? Block{} : block; + }; + + static void insertValue(ColumnPtr & column, const mysqlxx::Value & value, const attribute_type type) + { + switch (type) + { + case attribute_type::uint8: column->insert(static_cast(value)); break; + case attribute_type::uint16: column->insert(static_cast(value)); break; + case attribute_type::uint32: column->insert(static_cast(value)); break; + case attribute_type::uint64: column->insert(static_cast(value)); break; + case attribute_type::int8: column->insert(static_cast(value)); break; + case attribute_type::int16: column->insert(static_cast(value)); break; + case attribute_type::int32: column->insert(static_cast(value)); break; + case attribute_type::int64: column->insert(static_cast(value)); break; + case attribute_type::float32: column->insert(static_cast(value)); break; + case attribute_type::float64: column->insert(static_cast(value)); break; + case attribute_type::string: column->insert(value.getString()); break; + } + } + + mysqlxx::Query query; + mysqlxx::UseQueryResult result; + Block sample_block; + std::size_t max_block_size; + std::vector types; +}; + +} diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h index f2448decc55..2a340ccc6c1 100644 --- a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -1,10 +1,10 @@ #pragma once #include +#include #include #include #include -#include #include #include #include @@ -13,47 +13,6 @@ namespace DB { -class MysqlBlockInputStream final : public IProfilingBlockInputStream -{ -public: - MysqlBlockInputStream(mysqlxx::Query query, const Block & sample_block, const std::size_t max_block_size) - : query{std::move(query)}, result{query.use()}, sample_block{sample_block}, max_block_size{max_block_size} - { - } - - String getName() const override { return "MysqlBlockInputStream"; } - - String getID() const override - { - return "Mysql(" + query.str() + ")"; - } - -private: - Block readImpl() override - { - auto block = sample_block.cloneEmpty(); - - std::size_t rows = 0; - while (auto row = result.fetch()) - { - for (const auto idx : ext::range(0, row.size())) - /// @todo type switch to get the real value from row[idx] - block.getByPosition(idx).column->insert(Field{}); - - ++rows; - if (rows == max_block_size) - break; - } - - return block; - } - - mysqlxx::Query query; - mysqlxx::UseQueryResult result; - Block sample_block; - std::size_t max_block_size; -}; - class MysqlDictionarySource final : public IDictionarySource { static const auto max_block_size = 8192; @@ -63,21 +22,15 @@ public: Block & sample_block, const Context & context) : layered_config_ptr{getLayeredConfig(config)}, pool{*layered_config_ptr, config_prefix}, - sample_block{sample_block}, context(context) {} + sample_block{sample_block}, context(context), + table{config.getString(config_prefix + "table")}, + load_all_query{composeLoadAllQuery(sample_block, table)} + {} private: BlockInputStreamPtr loadAll() override { - auto connection = pool.Get(); - auto query = connection->query("SELECT 1+1;"); - auto result = query.use(); - while (auto row = result.fetch()) - { - for (const auto idx : ext::range(0, row.size())) - std::cout << row[idx].getString() << ' '; - std::cout << std::endl; - } - return new MysqlBlockInputStream{pool.Get()->query(""), sample_block, max_block_size}; + return new MysqlBlockInputStream{pool.Get()->query(load_all_query), sample_block, max_block_size}; } BlockInputStreamPtr loadId(const std::uint64_t id) override @@ -103,10 +56,31 @@ private: return layered_config; } + static std::string composeLoadAllQuery(const Block & block, const std::string & table) + { + std::string query{"SELECT "}; + + auto first = true; + for (const auto idx : ext::range(0, block.columns())) + { + if (!first) + query += ", "; + + query += block.getByPosition(idx).name; + first = false; + } + + query += " FROM " + table + ';'; + + return query; + } + const config_ptr_t layered_config_ptr; mysqlxx::Pool pool; Block sample_block; const Context & context; + const std::string table; + const std::string load_all_query; }; } From f4e7d2d4f5970f72d562469d9e932fc8e82b213b Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 28 Jan 2015 16:20:20 +0300 Subject: [PATCH 08/43] dbms: add dictGetHierarchy and dictIsIn. [#METR-13298] --- .../DB/Dictionaries/DictionaryStructure.h | 23 ++ dbms/include/DB/Dictionaries/FlatDictionary.h | 106 +++-- dbms/include/DB/Dictionaries/IDictionary.h | 32 +- .../DB/Functions/FunctionsDictionaries.h | 389 +++++++++++++++++- dbms/src/Functions/FunctionsDictionaries.cpp | 2 + dbms/src/Interpreters/Context.cpp | 2 +- 6 files changed, 518 insertions(+), 36 deletions(-) diff --git a/dbms/include/DB/Dictionaries/DictionaryStructure.h b/dbms/include/DB/Dictionaries/DictionaryStructure.h index c23468c3551..47e445f5e31 100644 --- a/dbms/include/DB/Dictionaries/DictionaryStructure.h +++ b/dbms/include/DB/Dictionaries/DictionaryStructure.h @@ -50,6 +50,29 @@ attribute_type getAttributeTypeByName(const std::string & type) }; } +std::string toString(const attribute_type type) +{ + switch (type) + { + case attribute_type::uint8: return "UInt8"; + case attribute_type::uint16: return "UInt16"; + case attribute_type::uint32: return "UInt32"; + case attribute_type::uint64: return "UInt64"; + case attribute_type::int8: return "Int8"; + case attribute_type::int16: return "Int16"; + case attribute_type::int32: return "Int32"; + case attribute_type::int64: return "Int64"; + case attribute_type::float32: return "Float32"; + case attribute_type::float64: return "Float64"; + case attribute_type::string: return "String"; + } + + throw Exception{ + "Unknown attribute_type " + toString(type), + ErrorCodes::ARGUMENT_OUT_OF_BOUND + }; +} + struct DictionaryAttribute { std::string name; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 7a671d471f7..f846a92f019 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -20,13 +20,16 @@ public: const std::string & config_prefix, DictionarySourcePtr source_ptr) : source_ptr{std::move(source_ptr)} { + attributes.reserve(dict_struct.attributes.size()); for (const auto & attribute : dict_struct.attributes) { - attributes.emplace(attribute.name, - createAttributeWithType(getAttributeTypeByName(attribute.type), attribute.null_value)); + attribute_index_by_name.emplace(attribute.name, attributes.size()); + attributes.emplace_back( + createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value)); if (attribute.hierarchical) - hierarchical_attribute = &attributes[attribute.name]; + hierarchical_attribute = &attributes.back(); } auto stream = this->source_ptr->loadAll(); @@ -38,7 +41,7 @@ public: for (const auto attribute_idx : ext::range(0, attributes.size())) { const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; - auto & attribute = attributes[dict_struct.attributes[attribute_idx].name]; + auto & attribute = attributes[attribute_idx]; for (const auto row_idx : ext::range(0, id_column.size())) setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); @@ -49,14 +52,41 @@ public: this->source_ptr->reset(); } -private: - UInt64 getUInt64(const id_t id, const std::string & attribute_name) const override + id_t toParent(const id_t id) const override { - const auto & attribute = findAttribute(attribute_name); + const auto exists = id < max_array_size; + const auto attr = hierarchical_attribute; + + switch (hierarchical_attribute->type) + { + case attribute_type::uint8: return exists ? attr->uint8_array[id] : attr->uint8_null_value; + case attribute_type::uint16: return exists ? attr->uint16_array[id] : attr->uint16_null_value; + case attribute_type::uint32: return exists ? attr->uint32_array[id] : attr->uint32_null_value; + case attribute_type::uint64: return exists ? attr->uint64_array[id] : attr->uint64_null_value; + case attribute_type::int8: return exists ? attr->int8_array[id] : attr->int8_null_value; + case attribute_type::int16: return exists ? attr->int16_array[id] : attr->int16_null_value; + case attribute_type::int32: return exists ? attr->int32_array[id] : attr->int32_null_value; + case attribute_type::int64: return exists ? attr->int64_array[id] : attr->int64_null_value; + case attribute_type::float32: + case attribute_type::float64: + case attribute_type::string: + break; + } + + throw Exception{ + "Hierarchical attribute has non-integer type " + toString(hierarchical_attribute->type), + ErrorCodes::TYPE_MISMATCH + }; + } + + UInt64 getUInt64(const std::string & attribute_name, const id_t id) const override + { + const auto idx = getAttributeIndex(attribute_name); + const auto & attribute = attributes[idx]; if (attribute.type != attribute_type::uint64) throw Exception{ - "Type mismatch: attribute " + attribute_name + " has a type different from UInt64", + "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH }; @@ -66,13 +96,14 @@ private: return attribute.uint64_null_value; } - StringRef getString(const id_t id, const std::string & attribute_name) const override + StringRef getString(const std::string & attribute_name, const id_t id) const override { - const auto & attribute = findAttribute(attribute_name); + const auto idx = getAttributeIndex(attribute_name); + const auto & attribute = attributes[idx]; if (attribute.type != attribute_type::string) throw Exception{ - "Type mismatch: attribute " + attribute_name + " has a type different from String", + "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH }; @@ -82,8 +113,44 @@ private: return { attribute.string_null_value.data(), attribute.string_null_value.size() }; } + std::size_t getAttributeIndex(const std::string & attribute_name) const override + { + const auto it = attribute_index_by_name.find(attribute_name); + if (it == std::end(attribute_index_by_name)) + throw Exception{ + "No such attribute '" + attribute_name + "'", + ErrorCodes::BAD_ARGUMENTS + }; + + return it->second; + } + + bool isUInt64(const std::size_t attribute_idx) const override + { + return attributes[attribute_idx].type == attribute_type::uint64; + } + + bool isString(const std::size_t attribute_idx) const override + { + return attributes[attribute_idx].type == attribute_type::string; + } + + UInt64 getUInt64Unsafe(const std::size_t attribute_idx, const id_t id) const override + { + const auto & attribute = attributes[attribute_idx]; + return id < max_array_size ? attribute.uint64_array[id] : attribute.uint64_null_value; + } + + StringRef getStringUnsafe(const std::size_t attribute_idx, const id_t id) const override + { + const auto & attribute = attributes[attribute_idx]; + return id < max_array_size ? attribute.string_array[id] : attribute.string_null_value; + } + bool isComplete() const override { return true; } + bool hasHierarchy() const override { return hierarchical_attribute; } + struct attribute_t { attribute_type type; @@ -112,8 +179,6 @@ private: std::vector string_array; }; - using attributes_t = std::map; - attribute_t createAttributeWithType(const attribute_type type, const std::string & null_value) { attribute_t attr{type}; @@ -221,19 +286,8 @@ private: } } - const attribute_t & findAttribute(const std::string & attribute_name) const - { - const auto it = attributes.find(attribute_name); - if (it == std::end(attributes)) - throw Exception{ - "No such attribute '" + attribute_name + "'", - ErrorCodes::BAD_ARGUMENTS - }; - - return it->second; - } - - attributes_t attributes; + std::map attribute_index_by_name; + std::vector attributes; const attribute_t * hierarchical_attribute = nullptr; DictionarySourcePtr source_ptr; diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index facae765e05..02905a0d5ba 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -12,9 +12,37 @@ class IDictionary public: using id_t = std::uint64_t; - virtual UInt64 getUInt64(const id_t id, const std::string & attribute_name) const = 0; - virtual StringRef getString(const id_t id, const std::string & attribute_name) const = 0; + virtual bool hasHierarchy() const = 0; + /// do not call unless you ensure that hasHierarchy() returns true + virtual id_t toParent(id_t id) const = 0; + + bool in(id_t child_id, const id_t ancestor_id) const + { + while (child_id != 0 && child_id != ancestor_id) + child_id = toParent(child_id); + + return child_id != 0; + } + + /// safe and slow functions, perform map lookup and type checks + virtual UInt64 getUInt64(const std::string & attribute_name, id_t id) const = 0; + virtual StringRef getString(const std::string & attribute_name, id_t id) const = 0; + + /// unsafe functions for maximum performance, you are on your own ensuring type-safety + + /// returns persistent attribute index for usage with following functions + virtual std::size_t getAttributeIndex(const std::string & attribute_name) const = 0; + + /// type-checking functions + virtual bool isUInt64(std::size_t attribute_idx) const = 0; + virtual bool isString(std::size_t attribute_idx) const = 0; + + /// plain load from target container without any checks + virtual UInt64 getUInt64Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual StringRef getStringUnsafe(std::size_t attribute_idx, id_t id) const = 0; + + /// entirely-loaded dictionaries should be immutable virtual bool isComplete() const = 0; virtual void reload() {} diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index 2f0559f55b9..1bc62603f3f 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -12,6 +12,7 @@ #include #include +#include namespace DB @@ -884,9 +885,16 @@ private: const auto out = new ColumnString; block.getByPosition(result).column = out; + const auto attribute_idx = dictionary->getAttributeIndex(attr_name); + if (!dictionary->isString(attribute_idx)) + throw Exception{ + "Type mismatch: attribute " + attr_name + " has type different from String", + ErrorCodes::TYPE_MISMATCH + }; + for (const auto & id : id_col->getData()) { - const auto string_ref = dictionary->getString(id, attr_name); + const auto string_ref = dictionary->getStringUnsafe(attribute_idx, id); out->insertData(string_ref.data, string_ref.size); } @@ -896,7 +904,7 @@ private: { block.getByPosition(result).column = new ColumnConst{ id_col->size(), - dictionary->getString(id_col->getData(), attr_name).toString() + dictionary->getString(attr_name, id_col->getData()).toString() }; return true; @@ -910,7 +918,7 @@ private: template -class FunctionDictGetInteger: public IFunction +class FunctionDictGetInteger final : public IFunction { public: static const std::string name; @@ -961,7 +969,7 @@ private: !typeid_cast(id_arg)) { throw Exception{ - "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), + "Illegal type " + id_arg->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT }; } @@ -969,7 +977,7 @@ private: return new typename DataTypeFromFieldType::Type; } - void execute(Block & block, const ColumnNumbers & arguments, const size_t result) + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = typeid_cast *>(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) @@ -1015,8 +1023,15 @@ private: const auto out = new ColumnVector; block.getByPosition(result).column = out; + const auto attribute_idx = dictionary->getAttributeIndex(attr_name); + if (!dictionary->isUInt64(attribute_idx)) + throw Exception{ + "Type mismatch: attribute " + attr_name + " has type different from UInt64", + ErrorCodes::TYPE_MISMATCH + }; + for (const auto & id : id_col->getData()) - out->insert(dictionary->getUInt64(id, attr_name)); + out->insert(dictionary->getUInt64Unsafe(attribute_idx, id)); return true; } @@ -1024,7 +1039,7 @@ private: { block.getByPosition(result).column = new ColumnConst{ id_col->size(), - static_cast(dictionary->getUInt64(id_col->getData(), attr_name)) + static_cast(dictionary->getUInt64(attr_name, id_col->getData())) }; return true; @@ -1050,4 +1065,364 @@ using FunctionDictGetInt32 = FunctionDictGetInteger; using FunctionDictGetInt64 = FunctionDictGetInteger; + +class FunctionDictGetHierarchy final : public IFunction +{ +public: + static constexpr auto name = "dictGetHierarchy"; + + static IFunction * create(const Context & context) + { + return new FunctionDictGetHierarchy{context.getDictionaries()}; + }; + + FunctionDictGetHierarchy(const Dictionaries & dictionaries) : dictionaries(dictionaries) {} + + String getName() const override { return name; } + +private: + DataTypePtr getReturnType(const DataTypes & arguments) const override + { + if (arguments.size() != 2) + throw Exception{ + "Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH + }; + + if (!typeid_cast(arguments[0].get())) + { + throw Exception{ + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + const auto id_arg = arguments[1].get(); + if (!typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg) && + !typeid_cast(id_arg)) + { + throw Exception{ + "Illegal type " + id_arg->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + return new DataTypeArray{new DataTypeUInt64}; + }; + + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) override + { + const auto dict_name_col = typeid_cast *>(block.getByPosition(arguments[0]).column.get()); + if (!dict_name_col) + throw Exception{ + "First argument of function " + getName() + " must be a constant string", + ErrorCodes::ILLEGAL_COLUMN + }; + + auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + + if (!dict->hasHierarchy()) + throw Exception{ + "Dictionary does not have a hierarchy", + ErrorCodes::UNSUPPORTED_METHOD + }; + + const auto id_col = block.getByPosition(arguments[1]).column.get(); + if (!execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col) && + !execute(block, result, dict, id_col)) + { + throw Exception{ + "Second argument of function " + getName() + " must be integral", + ErrorCodes::ILLEGAL_COLUMN + }; + } + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const IColumn * const id_col_untyped) + { + if (const auto id_col = typeid_cast *>(id_col_untyped)) + { + const auto backend = new ColumnVector; + const auto array = new ColumnArray{backend}; + block.getByPosition(result).column = array; + + const auto & in = id_col->getData(); + const auto size = in.size(); + auto & out = backend->getData(); + auto & offsets = array->getOffsets(); + offsets.resize(size); + out.reserve(size * 4); + + for (const auto idx : ext::range(0, size)) + { + IDictionary::id_t cur = in[idx]; + while (cur) + { + out.push_back(cur); + cur = dictionary->toParent(cur); + } + offsets[idx] = out.size(); + }; + + return true; + } + else if (const auto id_col = typeid_cast *>(id_col_untyped)) + { + Array res; + + IDictionary::id_t cur = id_col->getData(); + while (cur) + { + res.push_back(static_cast::Type>(cur)); + cur = dictionary->toParent(cur); + } + + block.getByPosition(result).column = new ColumnConstArray{ + id_col->size(), + res, + new DataTypeArray{new DataTypeUInt64} + }; + + return true; + }; + + return false; + } + + const Dictionaries & dictionaries; +}; + + +class FunctionDictIsIn final : public IFunction +{ +public: + static constexpr auto name = "dictIsIn"; + + static IFunction * create(const Context & context) + { + return new FunctionDictIsIn{context.getDictionaries()}; + }; + + FunctionDictIsIn(const Dictionaries & dictionaries) : dictionaries(dictionaries) {} + + String getName() const override { return name; } + +private: + DataTypePtr getReturnType(const DataTypes & arguments) const override + { + if (arguments.size() != 3) + throw Exception{ + "Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 3.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH + }; + + if (!typeid_cast(arguments[0].get())) + { + throw Exception{ + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + const auto child_id_arg = arguments[1].get(); + if (!typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg) && + !typeid_cast(child_id_arg)) + { + throw Exception{ + "Illegal type " + child_id_arg->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + const auto ancestor_id_arg = arguments[2].get(); + if (!typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg) && + !typeid_cast(ancestor_id_arg)) + { + throw Exception{ + "Illegal type " + ancestor_id_arg->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT + }; + } + + return new DataTypeUInt8; + } + + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) override + { + const auto dict_name_col = typeid_cast *>(block.getByPosition(arguments[0]).column.get()); + if (!dict_name_col) + throw Exception{ + "First argument of function " + getName() + " must be a constant string", + ErrorCodes::ILLEGAL_COLUMN + }; + + auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + + if (!dict->hasHierarchy()) + throw Exception{ + "Dictionary does not have a hierarchy", + ErrorCodes::UNSUPPORTED_METHOD + }; + + const auto child_id_col = block.getByPosition(arguments[1]).column.get(); + const auto ancestor_id_col = block.getByPosition(arguments[2]).column.get(); + if (!execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col) && + !execute(block, result, dict, child_id_col, ancestor_id_col)) + { + throw Exception{ + "Illegal column " + child_id_col->getName() + + " of second argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN + }; + } + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const IColumn * const child_id_col_untyped, const IColumn * const ancestor_id_col_untyped) + { + if (execute>(block, result, dictionary, child_id_col_untyped, ancestor_id_col_untyped) || + execute>(block, result, dictionary, child_id_col_untyped, ancestor_id_col_untyped)) + return true; + + return false; + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const IColumn * const child_id_col_untyped, const IColumn * const ancestor_id_col_untyped) + { + if (const auto child_id_col = typeid_cast(child_id_col_untyped)) + { + if (execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped) || + execute(block, result, dictionary, child_id_col, ancestor_id_col_untyped)) + return true; + else + throw Exception{ + "Illegal column " + ancestor_id_col_untyped->getName() + + " of third argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN + }; + } + + return false; + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const ColumnVector * const child_id_col, const IColumn * const ancestor_id_col_untyped) + { + if (const auto ancestor_id_col = typeid_cast *>(ancestor_id_col_untyped)) + { + const auto out = new ColumnVector; + block.getByPosition(result).column = out; + + const auto & child_ids = child_id_col->getData(); + const auto & ancestor_ids = ancestor_id_col->getData(); + auto & data = out->getData(); + const auto size = child_id_col->size(); + data.resize(size); + + for (const auto idx : ext::range(0, size)) + data[idx] = dictionary->in(child_ids[idx], ancestor_ids[idx]); + + return true; + } + else if (const auto ancestor_id_col = typeid_cast *>(ancestor_id_col_untyped)) + { + const auto out = new ColumnVector; + block.getByPosition(result).column = out; + + const auto & child_ids = child_id_col->getData(); + const auto ancestor_id = ancestor_id_col->getData(); + auto & data = out->getData(); + const auto size = child_id_col->size(); + data.resize(size); + + for (const auto idx : ext::range(0, size)) + data[idx] = dictionary->in(child_ids[idx], ancestor_id); + + return true; + } + + return false; + } + + template + bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + const ColumnConst * const child_id_col, const IColumn * const ancestor_id_col_untyped) + { + if (const auto ancestor_id_col = typeid_cast *>(ancestor_id_col_untyped)) + { + const auto out = new ColumnVector; + block.getByPosition(result).column = out; + + const auto child_id = child_id_col->getData(); + const auto & ancestor_ids = ancestor_id_col->getData(); + auto & data = out->getData(); + const auto size = child_id_col->size(); + data.resize(size); + + for (const auto idx : ext::range(0, size)) + data[idx] = dictionary->in(child_id, ancestor_ids[idx]); + + return true; + } + else if (const auto ancestor_id_col = typeid_cast *>(ancestor_id_col_untyped)) + { + block.getByPosition(result).column = new ColumnConst{ + child_id_col->size(), + dictionary->in(child_id_col->getData(), ancestor_id_col->getData()) + }; + + return true; + } + + return false; + } + + const Dictionaries & dictionaries; +}; + + } diff --git a/dbms/src/Functions/FunctionsDictionaries.cpp b/dbms/src/Functions/FunctionsDictionaries.cpp index fa8cf5d5b1d..15536d2c247 100644 --- a/dbms/src/Functions/FunctionsDictionaries.cpp +++ b/dbms/src/Functions/FunctionsDictionaries.cpp @@ -34,6 +34,8 @@ void registerFunctionsDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 1254d94bb81..cdb6f53b287 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -496,7 +496,7 @@ const Dictionaries & Context::getDictionaries() const Poco::ScopedLock lock(shared->mutex); if (!shared->dictionaries) - shared->dictionaries = new Dictionaries{*this}; + shared->dictionaries = new Dictionaries{*this->global_context}; return *shared->dictionaries; } From e982b5c92d5d2380997dd97ab87f7399d8099531 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 28 Jan 2015 18:02:05 +0300 Subject: [PATCH 09/43] dbms: add all integral and floating point types to IDictionary. [#METR-13298] --- dbms/include/DB/Dictionaries/FlatDictionary.h | 108 ++++++++++-------- dbms/include/DB/Dictionaries/IDictionary.h | 29 ++++- .../DB/Functions/FunctionsDictionaries.h | 101 ++++++++++------ dbms/src/Functions/FunctionsDictionaries.cpp | 2 + 4 files changed, 156 insertions(+), 84 deletions(-) diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index f846a92f019..8ca411d105d 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -79,39 +79,32 @@ public: }; } - UInt64 getUInt64(const std::string & attribute_name, const id_t id) const override - { - const auto idx = getAttributeIndex(attribute_name); - const auto & attribute = attributes[idx]; - - if (attribute.type != attribute_type::uint64) - throw Exception{ - "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), - ErrorCodes::TYPE_MISMATCH - }; - - if (id < max_array_size) - return attribute.uint64_array[id]; - - return attribute.uint64_null_value; + #define DECLARE_SAFE_GETTER(TYPE, NAME, LC_TYPE) \ + TYPE get##NAME(const std::string & attribute_name, const id_t id) const override\ + {\ + const auto idx = getAttributeIndex(attribute_name);\ + const auto & attribute = attributes[idx];\ + if (attribute.type != attribute_type::LC_TYPE)\ + throw Exception{\ + "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ + ErrorCodes::TYPE_MISMATCH\ + };\ + if (id < max_array_size)\ + return attribute.LC_TYPE##_array[id];\ + return attribute.LC_TYPE##_null_value;\ } - - StringRef getString(const std::string & attribute_name, const id_t id) const override - { - const auto idx = getAttributeIndex(attribute_name); - const auto & attribute = attributes[idx]; - - if (attribute.type != attribute_type::string) - throw Exception{ - "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), - ErrorCodes::TYPE_MISMATCH - }; - - if (id < max_array_size) - return attribute.string_array[id]; - - return { attribute.string_null_value.data(), attribute.string_null_value.size() }; - } + DECLARE_SAFE_GETTER(UInt8, UInt8, uint8) + DECLARE_SAFE_GETTER(UInt16, UInt16, uint16) + DECLARE_SAFE_GETTER(UInt32, UInt32, uint32) + DECLARE_SAFE_GETTER(UInt64, UInt64, uint64) + DECLARE_SAFE_GETTER(Int8, Int8, int8) + DECLARE_SAFE_GETTER(Int16, Int16, int16) + DECLARE_SAFE_GETTER(Int32, Int32, int32) + DECLARE_SAFE_GETTER(Int64, Int64, int64) + DECLARE_SAFE_GETTER(Float32, Float32, float32) + DECLARE_SAFE_GETTER(Float64, Float64, float64) + DECLARE_SAFE_GETTER(StringRef, String, string) + #undef DECLARE_SAFE_GETTER std::size_t getAttributeIndex(const std::string & attribute_name) const override { @@ -125,27 +118,42 @@ public: return it->second; } - bool isUInt64(const std::size_t attribute_idx) const override - { - return attributes[attribute_idx].type == attribute_type::uint64; + #define DECLARE_TYPE_CHECKER(NAME, LC_NAME)\ + bool is##NAME(const std::size_t attribute_idx) const override\ + {\ + return attributes[attribute_idx].type == attribute_type::LC_NAME;\ } + DECLARE_TYPE_CHECKER(UInt8, uint8) + DECLARE_TYPE_CHECKER(UInt16, uint16) + DECLARE_TYPE_CHECKER(UInt32, uint32) + DECLARE_TYPE_CHECKER(UInt64, uint64) + DECLARE_TYPE_CHECKER(Int8, int8) + DECLARE_TYPE_CHECKER(Int16, int16) + DECLARE_TYPE_CHECKER(Int32, int32) + DECLARE_TYPE_CHECKER(Int64, int64) + DECLARE_TYPE_CHECKER(Float32, float32) + DECLARE_TYPE_CHECKER(Float64, float64) + DECLARE_TYPE_CHECKER(String, string) + #undef DECLARE_TYPE_CHECKER - bool isString(const std::size_t attribute_idx) const override - { - return attributes[attribute_idx].type == attribute_type::string; - } - - UInt64 getUInt64Unsafe(const std::size_t attribute_idx, const id_t id) const override - { - const auto & attribute = attributes[attribute_idx]; - return id < max_array_size ? attribute.uint64_array[id] : attribute.uint64_null_value; - } - - StringRef getStringUnsafe(const std::size_t attribute_idx, const id_t id) const override - { - const auto & attribute = attributes[attribute_idx]; - return id < max_array_size ? attribute.string_array[id] : attribute.string_null_value; + #define DECLARE_UNSAFE_GETTER(TYPE, NAME, LC_NAME)\ + TYPE get##NAME##Unsafe(const std::size_t attribute_idx, const id_t id) const override\ + {\ + const auto & attribute = attributes[attribute_idx];\ + return id < max_array_size ? attribute.LC_NAME##_array[id] : attribute.LC_NAME##_null_value;\ } + DECLARE_UNSAFE_GETTER(UInt8, UInt8, uint8) + DECLARE_UNSAFE_GETTER(UInt16, UInt16, uint16) + DECLARE_UNSAFE_GETTER(UInt32, UInt32, uint32) + DECLARE_UNSAFE_GETTER(UInt64, UInt64, uint64) + DECLARE_UNSAFE_GETTER(Int8, Int8, int8) + DECLARE_UNSAFE_GETTER(Int16, Int16, int16) + DECLARE_UNSAFE_GETTER(Int32, Int32, int32) + DECLARE_UNSAFE_GETTER(Int64, Int64, int64) + DECLARE_UNSAFE_GETTER(Float32, Float32, float32) + DECLARE_UNSAFE_GETTER(Float64, Float64, float64) + DECLARE_UNSAFE_GETTER(StringRef, String, string) + #undef DECLARE_UNSAFE_GETTER bool isComplete() const override { return true; } diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index 02905a0d5ba..f76823568d6 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -26,8 +26,17 @@ public: } /// safe and slow functions, perform map lookup and type checks + virtual UInt8 getUInt8(const std::string & attribute_name, id_t id) const = 0; + virtual UInt16 getUInt16(const std::string & attribute_name, id_t id) const = 0; + virtual UInt32 getUInt32(const std::string & attribute_name, id_t id) const = 0; virtual UInt64 getUInt64(const std::string & attribute_name, id_t id) const = 0; - virtual StringRef getString(const std::string & attribute_name, id_t id) const = 0; + virtual Int8 getInt8(const std::string & attribute_name, id_t id) const = 0; + virtual Int16 getInt16(const std::string & attribute_name, id_t id) const = 0; + virtual Int32 getInt32(const std::string & attribute_name, id_t id) const = 0; + virtual Int64 getInt64(const std::string & attribute_name, id_t id) const = 0; + virtual Float32 getFloat32(const std::string & attribute_name, id_t id) const = 0; + virtual Float64 getFloat64(const std::string & attribute_name, id_t id) const = 0; + virtual StringRef getString(const std::string & attribute_name, id_t id) const = 0; /// unsafe functions for maximum performance, you are on your own ensuring type-safety @@ -35,11 +44,29 @@ public: virtual std::size_t getAttributeIndex(const std::string & attribute_name) const = 0; /// type-checking functions + virtual bool isUInt8(std::size_t attribute_idx) const = 0; + virtual bool isUInt16(std::size_t attribute_idx) const = 0; + virtual bool isUInt32(std::size_t attribute_idx) const = 0; virtual bool isUInt64(std::size_t attribute_idx) const = 0; + virtual bool isInt8(std::size_t attribute_idx) const = 0; + virtual bool isInt16(std::size_t attribute_idx) const = 0; + virtual bool isInt32(std::size_t attribute_idx) const = 0; + virtual bool isInt64(std::size_t attribute_idx) const = 0; + virtual bool isFloat32(std::size_t attribute_idx) const = 0; + virtual bool isFloat64(std::size_t attribute_idx) const = 0; virtual bool isString(std::size_t attribute_idx) const = 0; /// plain load from target container without any checks + virtual UInt8 getUInt8Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual UInt16 getUInt16Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual UInt32 getUInt32Unsafe(std::size_t attribute_idx, id_t id) const = 0; virtual UInt64 getUInt64Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual Int8 getInt8Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual Int16 getInt16Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual Int32 getInt32Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual Int64 getInt64Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual Float32 getFloat32Unsafe(std::size_t attribute_idx, id_t id) const = 0; + virtual Float64 getFloat64Unsafe(std::size_t attribute_idx, id_t id) const = 0; virtual StringRef getStringUnsafe(std::size_t attribute_idx, id_t id) const = 0; /// entirely-loaded dictionaries should be immutable diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index 1bc62603f3f..4dbc24586a5 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -917,18 +917,46 @@ private: }; -template -class FunctionDictGetInteger final : public IFunction +template struct DictGetTraits; +#define DECLARE_DICT_GET_TRAITS(TYPE, DATA_TYPE) \ +template <> struct DictGetTraits\ +{\ + static TYPE get(const IDictionary * const dict, const std::string & name, const IDictionary::id_t id)\ + {\ + return dict->get##TYPE(name, id);\ + }\ + static bool is(const IDictionary * const dict, const std::size_t idx) { return dict->is##TYPE(idx); } \ + static TYPE get(const IDictionary * const dict, const std::size_t idx, const IDictionary::id_t id)\ + {\ + return dict->get##TYPE##Unsafe(idx, id);\ + }\ +}; +DECLARE_DICT_GET_TRAITS(UInt8, DataTypeUInt8) +DECLARE_DICT_GET_TRAITS(UInt16, DataTypeUInt16) +DECLARE_DICT_GET_TRAITS(UInt32, DataTypeUInt32) +DECLARE_DICT_GET_TRAITS(UInt64, DataTypeUInt64) +DECLARE_DICT_GET_TRAITS(Int8, DataTypeInt8) +DECLARE_DICT_GET_TRAITS(Int16, DataTypeInt16) +DECLARE_DICT_GET_TRAITS(Int32, DataTypeInt32) +DECLARE_DICT_GET_TRAITS(Int64, DataTypeInt64) +DECLARE_DICT_GET_TRAITS(Float32, DataTypeFloat32) +DECLARE_DICT_GET_TRAITS(Float64, DataTypeFloat64) +#undef DECLARE_DICT_GET_TRAITS + +template +class FunctionDictGet final : public IFunction { + using Type = typename DataType::FieldType; + public: static const std::string name; static IFunction * create(const Context & context) { - return new FunctionDictGetInteger{context.getDictionaries()}; + return new FunctionDictGet{context.getDictionaries()}; }; - FunctionDictGetInteger(const Dictionaries & dictionaries) : dictionaries(dictionaries) {} + FunctionDictGet(const Dictionaries & dictionaries) : dictionaries(dictionaries) {} String getName() const override { return name; } @@ -974,7 +1002,7 @@ private: }; } - return new typename DataTypeFromFieldType::Type; + return new DataType; } void execute(Block & block, const ColumnNumbers & arguments, const size_t result) override @@ -987,6 +1015,7 @@ private: }; auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + const auto dict_ptr = dict.get(); const auto attr_name_col = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); if (!attr_name_col) @@ -998,14 +1027,14 @@ private: const auto & attr_name = attr_name_col->getData(); const auto id_col = block.getByPosition(arguments[2]).column.get(); - if (!execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col) && - !execute(block, result, dict, attr_name, id_col)) + if (!execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col) && + !execute(block, result, dict_ptr, attr_name, id_col)) { throw Exception{ "Third argument of function " + getName() + " must be integral", @@ -1015,31 +1044,36 @@ private: } template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + bool execute(Block & block, const size_t result, const IDictionary * const dictionary, const std::string & attr_name, const IColumn * const id_col_untyped) { if (const auto id_col = typeid_cast *>(id_col_untyped)) { - const auto out = new ColumnVector; - block.getByPosition(result).column = out; - const auto attribute_idx = dictionary->getAttributeIndex(attr_name); - if (!dictionary->isUInt64(attribute_idx)) + if (!DictGetTraits::is(dictionary, attribute_idx)) throw Exception{ "Type mismatch: attribute " + attr_name + " has type different from UInt64", ErrorCodes::TYPE_MISMATCH }; - for (const auto & id : id_col->getData()) - out->insert(dictionary->getUInt64Unsafe(attribute_idx, id)); + const auto out = new ColumnVector; + block.getByPosition(result).column = out; + + const auto & ids = id_col->getData(); + auto & data = out->getData(); + const auto size = ids.size(); + data.resize(size); + + for (const auto idx : ext::range(0, size)) + data[idx] = DictGetTraits::get(dictionary, attribute_idx, ids[idx]); return true; } else if (const auto id_col = typeid_cast *>(id_col_untyped)) { - block.getByPosition(result).column = new ColumnConst{ + block.getByPosition(result).column = new ColumnConst{ id_col->size(), - static_cast(dictionary->getUInt64(attr_name, id_col->getData())) + DictGetTraits::get(dictionary, attr_name, id_col->getData()) }; return true; @@ -1051,19 +1085,20 @@ private: const Dictionaries & dictionaries; }; -template -const std::string FunctionDictGetInteger::name = "dictGet" + TypeName::get(); +template +const std::string FunctionDictGet::name = "dictGet" + TypeName::get(); -using FunctionDictGetUInt8 = FunctionDictGetInteger; -using FunctionDictGetUInt16 = FunctionDictGetInteger; -using FunctionDictGetUInt32 = FunctionDictGetInteger; -using FunctionDictGetUInt64 = FunctionDictGetInteger; -using FunctionDictGetInt8 = FunctionDictGetInteger; -using FunctionDictGetInt16 = FunctionDictGetInteger; -using FunctionDictGetInt32 = FunctionDictGetInteger; -using FunctionDictGetInt64 = FunctionDictGetInteger; - +using FunctionDictGetUInt8 = FunctionDictGet; +using FunctionDictGetUInt16 = FunctionDictGet; +using FunctionDictGetUInt32 = FunctionDictGet; +using FunctionDictGetUInt64 = FunctionDictGet; +using FunctionDictGetInt8 = FunctionDictGet; +using FunctionDictGetInt16 = FunctionDictGet; +using FunctionDictGetInt32 = FunctionDictGet; +using FunctionDictGetInt64 = FunctionDictGet; +using FunctionDictGetFloat32 = FunctionDictGet; +using FunctionDictGetFloat64 = FunctionDictGet; class FunctionDictGetHierarchy final : public IFunction diff --git a/dbms/src/Functions/FunctionsDictionaries.cpp b/dbms/src/Functions/FunctionsDictionaries.cpp index 15536d2c247..ebfc5399f9e 100644 --- a/dbms/src/Functions/FunctionsDictionaries.cpp +++ b/dbms/src/Functions/FunctionsDictionaries.cpp @@ -33,6 +33,8 @@ void registerFunctionsDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); From 070c6be60e0a36920bb91bba89cf92e88354359f Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 28 Jan 2015 19:23:07 +0300 Subject: [PATCH 10/43] dbms: add PODArray::resize_fill(size_t, const T&) --- dbms/include/DB/Common/PODArray.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dbms/include/DB/Common/PODArray.h b/dbms/include/DB/Common/PODArray.h index 5e16e49a87b..d819415ceed 100644 --- a/dbms/include/DB/Common/PODArray.h +++ b/dbms/include/DB/Common/PODArray.h @@ -231,6 +231,17 @@ public: c_end = c_start + byte_size(n); } + void resize_fill(size_t n, const T & value) + { + size_t old_size = size(); + if (n > old_size) + { + reserve(n); + std::fill(t_end(), reinterpret_cast(c_end + n - old_size), value); + } + c_end = c_start + byte_size(n); + } + void clear() { c_end = c_start; From 95bb52b8e5ba884df1c60075724bc4ad6dec4191 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 28 Jan 2015 19:23:52 +0300 Subject: [PATCH 11/43] dbms: use PODArray with FlatDictionary [#METR-13298] remove IDictionarySource::reset --- .../DB/Dictionaries/FileDictionarySource.h | 14 +- dbms/include/DB/Dictionaries/FlatDictionary.h | 210 +++++++++++------- .../DB/Dictionaries/IDictionarySource.h | 2 - .../OwningBufferBlockInputStream.h | 32 +++ 4 files changed, 166 insertions(+), 92 deletions(-) create mode 100644 dbms/include/DB/Dictionaries/OwningBufferBlockInputStream.h diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 0502fbab4f1..1b39db51ba3 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -19,9 +20,11 @@ public: private: BlockInputStreamPtr loadAll() override { - in_ptr = ext::make_unique(filename); - return context.getFormatFactory().getInput( + auto in_ptr = ext::make_unique(filename); + auto stream = context.getFormatFactory().getInput( format, *in_ptr, sample_block, max_block_size, context.getDataTypeFactory()); + + return new OwningBufferBlockInputStream{stream, std::move(in_ptr)}; } BlockInputStreamPtr loadId(const std::uint64_t id) override @@ -40,17 +43,10 @@ private: }; } - void reset() override - { - in_ptr.reset(nullptr); - } - const std::string filename; const std::string format; Block sample_block; const Context & context; - - std::unique_ptr in_ptr; }; } diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 8ca411d105d..8852ac0b41f 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -9,10 +9,9 @@ namespace DB { -const auto initial_array_size = 128; +const auto initial_array_size = 1024; const auto max_array_size = 500000; -/// @todo manage arrays using std::vector or PODArray, start with an initial size, expand up to max_array_size class FlatDictionary final : public IDictionary { public: @@ -20,16 +19,17 @@ public: const std::string & config_prefix, DictionarySourcePtr source_ptr) : source_ptr{std::move(source_ptr)} { - attributes.reserve(dict_struct.attributes.size()); - for (const auto & attribute : dict_struct.attributes) + const auto size = dict_struct.attributes.size(); + attributes.resize(size); + for (const auto idx : ext::range(0, size)) { - attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.emplace_back( - createAttributeWithType(getAttributeTypeByName(attribute.type), + const auto & attribute = dict_struct.attributes[idx]; + attribute_index_by_name.emplace(attribute.name, idx); + attributes[idx] = std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), attribute.null_value)); if (attribute.hierarchical) - hierarchical_attribute = &attributes.back(); + hierarchical_attribute = &attributes[idx]; } auto stream = this->source_ptr->loadAll(); @@ -47,26 +47,22 @@ public: setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); } } - - /// @todo wrap source_ptr so that it reset buffer automatically - this->source_ptr->reset(); } id_t toParent(const id_t id) const override { - const auto exists = id < max_array_size; const auto attr = hierarchical_attribute; switch (hierarchical_attribute->type) { - case attribute_type::uint8: return exists ? attr->uint8_array[id] : attr->uint8_null_value; - case attribute_type::uint16: return exists ? attr->uint16_array[id] : attr->uint16_null_value; - case attribute_type::uint32: return exists ? attr->uint32_array[id] : attr->uint32_null_value; - case attribute_type::uint64: return exists ? attr->uint64_array[id] : attr->uint64_null_value; - case attribute_type::int8: return exists ? attr->int8_array[id] : attr->int8_null_value; - case attribute_type::int16: return exists ? attr->int16_array[id] : attr->int16_null_value; - case attribute_type::int32: return exists ? attr->int32_array[id] : attr->int32_null_value; - case attribute_type::int64: return exists ? attr->int64_array[id] : attr->int64_null_value; + case attribute_type::uint8: return id < attr->uint8_array->size() ? (*attr->uint8_array)[id] : attr->uint8_null_value; + case attribute_type::uint16: return id < attr->uint16_array->size() ? (*attr->uint16_array)[id] : attr->uint16_null_value; + case attribute_type::uint32: return id < attr->uint32_array->size() ? (*attr->uint32_array)[id] : attr->uint32_null_value; + case attribute_type::uint64: return id < attr->uint64_array->size() ? (*attr->uint64_array)[id] : attr->uint64_null_value; + case attribute_type::int8: return id < attr->int8_array->size() ? (*attr->int8_array)[id] : attr->int8_null_value; + case attribute_type::int16: return id < attr->int16_array->size() ? (*attr->int16_array)[id] : attr->int16_null_value; + case attribute_type::int32: return id < attr->int32_array->size() ? (*attr->int32_array)[id] : attr->int32_null_value; + case attribute_type::int64: return id < attr->int64_array->size() ? (*attr->int64_array)[id] : attr->int64_null_value; case attribute_type::float32: case attribute_type::float64: case attribute_type::string: @@ -89,8 +85,8 @@ public: "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ ErrorCodes::TYPE_MISMATCH\ };\ - if (id < max_array_size)\ - return attribute.LC_TYPE##_array[id];\ + if (id < attribute.LC_TYPE##_array->size())\ + return (*attribute.LC_TYPE##_array)[id];\ return attribute.LC_TYPE##_null_value;\ } DECLARE_SAFE_GETTER(UInt8, UInt8, uint8) @@ -140,7 +136,7 @@ public: TYPE get##NAME##Unsafe(const std::size_t attribute_idx, const id_t id) const override\ {\ const auto & attribute = attributes[attribute_idx];\ - return id < max_array_size ? attribute.LC_NAME##_array[id] : attribute.LC_NAME##_null_value;\ + return id < attribute.LC_NAME##_array->size() ? (*attribute.LC_NAME##_array)[id] : attribute.LC_NAME##_null_value;\ } DECLARE_UNSAFE_GETTER(UInt8, UInt8, uint8) DECLARE_UNSAFE_GETTER(UInt16, UInt16, uint16) @@ -173,18 +169,18 @@ public: Float32 float32_null_value; Float64 float64_null_value; String string_null_value; - std::unique_ptr uint8_array; - std::unique_ptr uint16_array; - std::unique_ptr uint32_array; - std::unique_ptr uint64_array; - std::unique_ptr int8_array; - std::unique_ptr int16_array; - std::unique_ptr int32_array; - std::unique_ptr int64_array; - std::unique_ptr float32_array; - std::unique_ptr float64_array; + std::unique_ptr> uint8_array; + std::unique_ptr> uint16_array; + std::unique_ptr> uint32_array; + std::unique_ptr> uint64_array; + std::unique_ptr> int8_array; + std::unique_ptr> int16_array; + std::unique_ptr> int32_array; + std::unique_ptr> int64_array; + std::unique_ptr> float32_array; + std::unique_ptr> float64_array; std::unique_ptr string_arena; - std::vector string_array; + std::unique_ptr> string_array; }; attribute_t createAttributeWithType(const attribute_type type, const std::string & null_value) @@ -195,60 +191,59 @@ public: { case attribute_type::uint8: attr.uint8_null_value = DB::parse(null_value); - attr.uint8_array.reset(new UInt8[max_array_size]); - std::fill(attr.uint8_array.get(), attr.uint8_array.get() + max_array_size, attr.uint8_null_value); + attr.uint8_array.reset(new PODArray); + attr.uint8_array->resize_fill(initial_array_size, attr.uint8_null_value); break; case attribute_type::uint16: attr.uint16_null_value = DB::parse(null_value); - attr.uint16_array.reset(new UInt16[max_array_size]); - std::fill(attr.uint16_array.get(), attr.uint16_array.get() + max_array_size, attr.uint16_null_value); + attr.uint16_array.reset(new PODArray); + attr.uint16_array->resize_fill(initial_array_size, attr.uint16_null_value); break; case attribute_type::uint32: attr.uint32_null_value = DB::parse(null_value); - attr.uint32_array.reset(new UInt32[max_array_size]); - std::fill(attr.uint32_array.get(), attr.uint32_array.get() + max_array_size, attr.uint32_null_value); + attr.uint32_array.reset(new PODArray); + attr.uint32_array->resize_fill(initial_array_size, attr.uint32_null_value); break; case attribute_type::uint64: attr.uint64_null_value = DB::parse(null_value); - attr.uint64_array.reset(new UInt64[max_array_size]); - std::fill(attr.uint64_array.get(), attr.uint64_array.get() + max_array_size, attr.uint64_null_value); + attr.uint64_array.reset(new PODArray); + attr.uint64_array->resize_fill(initial_array_size, attr.uint64_null_value); break; case attribute_type::int8: attr.int8_null_value = DB::parse(null_value); - attr.int8_array.reset(new Int8[max_array_size]); - std::fill(attr.int8_array.get(), attr.int8_array.get() + max_array_size, attr.int8_null_value); + attr.int8_array.reset(new PODArray); + attr.int8_array->resize_fill(initial_array_size, attr.int8_null_value); break; case attribute_type::int16: attr.int16_null_value = DB::parse(null_value); - attr.int16_array.reset(new Int16[max_array_size]); - std::fill(attr.int16_array.get(), attr.int16_array.get() + max_array_size, attr.int16_null_value); + attr.int16_array.reset(new PODArray); + attr.int16_array->resize_fill(initial_array_size, attr.int16_null_value); break; case attribute_type::int32: attr.int32_null_value = DB::parse(null_value); - attr.int32_array.reset(new Int32[max_array_size]); - std::fill(attr.int32_array.get(), attr.int32_array.get() + max_array_size, attr.int32_null_value); + attr.int32_array.reset(new PODArray); + attr.int32_array->resize_fill(initial_array_size, attr.int32_null_value); break; case attribute_type::int64: attr.int64_null_value = DB::parse(null_value); - attr.int64_array.reset(new Int64[max_array_size]); - std::fill(attr.int64_array.get(), attr.int64_array.get() + max_array_size, attr.int64_null_value); + attr.int64_array.reset(new PODArray); + attr.int64_array->resize_fill(initial_array_size, attr.int64_null_value); break; case attribute_type::float32: attr.float32_null_value = DB::parse(null_value); - attr.float32_array.reset(new Float32[max_array_size]); - std::fill(attr.float32_array.get(), attr.float32_array.get() + max_array_size, attr.float32_null_value); + attr.float32_array.reset(new PODArray); + attr.float32_array->resize_fill(initial_array_size, attr.float32_null_value); break; case attribute_type::float64: attr.float64_null_value = DB::parse(null_value); - attr.float64_array.reset(new Float64[max_array_size]); - std::fill(attr.float64_array.get(), attr.float64_array.get() + max_array_size, attr.float64_null_value); + attr.float64_array.reset(new PODArray); + attr.float64_array->resize_fill(initial_array_size, attr.float64_null_value); break; case attribute_type::string: attr.string_null_value = null_value; attr.string_arena.reset(new Arena); - attr.string_array.resize(initial_array_size, StringRef{ - attr.string_null_value.data(), attr.string_null_value.size() - }); + attr.string_array.reset(new PODArray); + attr.string_array->resize_fill(initial_array_size, attr.string_null_value); break; } @@ -265,33 +260,86 @@ public: switch (attribute.type) { - case attribute_type::uint8: attribute.uint8_array[id] = value.get(); break; - case attribute_type::uint16: attribute.uint16_array[id] = value.get(); break; - case attribute_type::uint32: attribute.uint32_array[id] = value.get(); break; - case attribute_type::uint64: attribute.uint64_array[id] = value.get(); break; - case attribute_type::int8: attribute.int8_array[id] = value.get(); break; - case attribute_type::int16: attribute.int16_array[id] = value.get(); break; - case attribute_type::int32: attribute.int32_array[id] = value.get(); break; - case attribute_type::int64: attribute.int64_array[id] = value.get(); break; - case attribute_type::float32: attribute.float32_array[id] = value.get(); break; - case attribute_type::float64: attribute.float64_array[id] = value.get(); break; - case attribute_type::string: + case attribute_type::uint8: { - const auto & string = value.get(); - const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); - - const auto current_size = attribute.string_array.size(); - if (id >= current_size) - attribute.string_array.resize( - std::min(max_array_size, 2 * current_size > id ? 2 * current_size : 2 * id), - StringRef{ - attribute.string_null_value.data(), attribute.string_null_value.size() - }); - - attribute.string_array[id] = StringRef{string_in_arena, string.size()}; + if (id >= attribute.uint8_array->size()) + attribute.uint8_array->resize_fill(id, attribute.uint8_null_value); + (*attribute.uint8_array)[id] = value.get(); break; } - } + case attribute_type::uint16: + { + if (id >= attribute.uint16_array->size()) + attribute.uint16_array->resize_fill(id, attribute.uint16_null_value); + (*attribute.uint16_array)[id] = value.get(); + break; + } + case attribute_type::uint32: + { + if (id >= attribute.uint32_array->size()) + attribute.uint32_array->resize_fill(id, attribute.uint32_null_value); + (*attribute.uint32_array)[id] = value.get(); + break; + } + case attribute_type::uint64: + { + if (id >= attribute.uint64_array->size()) + attribute.uint64_array->resize_fill(id, attribute.uint64_null_value); + (*attribute.uint64_array)[id] = value.get(); + break; + } + case attribute_type::int8: + { + if (id >= attribute.int8_array->size()) + attribute.int8_array->resize_fill(id, attribute.int8_null_value); + (*attribute.int8_array)[id] = value.get(); + break; + } + case attribute_type::int16: + { + if (id >= attribute.int16_array->size()) + attribute.int16_array->resize_fill(id, attribute.int16_null_value); + (*attribute.int16_array)[id] = value.get(); + break; + } + case attribute_type::int32: + { + if (id >= attribute.int32_array->size()) + attribute.int32_array->resize_fill(id, attribute.int32_null_value); + (*attribute.int32_array)[id] = value.get(); + break; + } + case attribute_type::int64: + { + if (id >= attribute.int64_array->size()) + attribute.int64_array->resize_fill(id, attribute.int64_null_value); + (*attribute.int64_array)[id] = value.get(); + break; + } + case attribute_type::float32: + { + if (id >= attribute.float32_array->size()) + attribute.float32_array->resize_fill(id, attribute.float32_null_value); + (*attribute.float32_array)[id] = value.get(); + break; + } + case attribute_type::float64: + { + if (id >= attribute.float64_array->size()) + attribute.float64_array->resize_fill(id, attribute.float64_null_value); + (*attribute.float64_array)[id] = value.get(); + break; + } + case attribute_type::string: + { + if (id >= attribute.string_array->size()) + attribute.string_array->resize_fill(id, attribute.string_null_value); + const auto & string = value.get(); + const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); + (*attribute.string_array)[id] = StringRef{string_in_arena, string.size()}; + break; + } + }; } std::map attribute_index_by_name; diff --git a/dbms/include/DB/Dictionaries/IDictionarySource.h b/dbms/include/DB/Dictionaries/IDictionarySource.h index 6efc8619754..526047f6fb9 100644 --- a/dbms/include/DB/Dictionaries/IDictionarySource.h +++ b/dbms/include/DB/Dictionaries/IDictionarySource.h @@ -13,8 +13,6 @@ public: virtual BlockInputStreamPtr loadId(const std::uint64_t id) = 0; virtual BlockInputStreamPtr loadIds(const std::vector ids) = 0; - virtual void reset() {} - virtual ~IDictionarySource() = default; }; diff --git a/dbms/include/DB/Dictionaries/OwningBufferBlockInputStream.h b/dbms/include/DB/Dictionaries/OwningBufferBlockInputStream.h new file mode 100644 index 00000000000..b50d4c322a7 --- /dev/null +++ b/dbms/include/DB/Dictionaries/OwningBufferBlockInputStream.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class OwningBufferBlockInputStream : public IProfilingBlockInputStream +{ +public: + OwningBufferBlockInputStream(const BlockInputStreamPtr & stream, std::unique_ptr buffer) + : stream{stream}, buffer{std::move(buffer)} + { + children.push_back(stream); + } + +private: + Block readImpl() override { return stream->read(); } + + String getName() const override { return "OwningBufferBlockInputStream"; } + + String getID() const override { + return "OwningBuffer(" + stream->getID() + ")"; + } + + BlockInputStreamPtr stream; + std::unique_ptr buffer; +}; + +} From 7addd501febbee4a1510494f47e1bc63938e61a5 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 29 Jan 2015 14:51:52 +0300 Subject: [PATCH 12/43] dbms: allow use of clickhouse as a dictionary source [#METR-13298] --- .../Dictionaries/ClickhouseDictionarySource.h | 109 ++++++++++++++++++ .../DB/Dictionaries/DictionarySourceFactory.h | 6 + .../DB/Dictionaries/MysqlDictionarySource.h | 2 - 3 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h new file mode 100644 index 00000000000..fdb360edc47 --- /dev/null +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ClickhouseDictionarySource final : public IDictionarySource +{ + static const auto max_block_size = 8192; + static const auto max_connections = 1; + +public: + ClickhouseDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + Block & sample_block, const Context & context) + : host{config.getString(config_prefix + "host")}, + port(config.getInt(config_prefix + "port")), + is_local{isLocal(host, port)}, + pool{is_local ? nullptr : ext::make_unique( + max_connections, host, port, + config.getString(config_prefix + "db", ""), + config.getString(config_prefix + "user", ""), + config.getString(config_prefix + "password", ""), + context.getDataTypeFactory(), + "ClickhouseDictionarySource") + }, + sample_block{sample_block}, context(context), + table{config.getString(config_prefix + "table")}, + load_all_query{composeLoadAllQuery(sample_block, table)} + {} + +private: + BlockInputStreamPtr loadAll() override + { + if (is_local) + return executeQuery(load_all_query, context).in; + return new RemoteBlockInputStream{pool.get(), load_all_query, nullptr}; + } + + BlockInputStreamPtr loadId(const std::uint64_t id) override + { + throw Exception{ + "Method unsupported", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + BlockInputStreamPtr loadIds(const std::vector ids) override + { + throw Exception{ + "Method unsupported", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + static std::string composeLoadAllQuery(const Block & block, const std::string & table) + { + std::string query{"SELECT "}; + + auto first = true; + for (const auto idx : ext::range(0, block.columns())) + { + if (!first) + query += ", "; + + query += block.getByPosition(idx).name; + first = false; + } + + query += " FROM " + table + ';'; + + return query; + } + + static bool isLocal(const std::string & host, const UInt16 port) + { + const UInt16 clickhouse_port = Poco::Util::Application::instance().config().getInt("tcp_port", 0); + static auto interfaces = Poco::Net::NetworkInterface::list(); + + if (clickhouse_port == port) + { + return interfaces.end() != std::find_if(interfaces.begin(), interfaces.end(), + [&] (const Poco::Net::NetworkInterface & interface) { + return interface.address() == Poco::Net::IPAddress(host); + }); + } + + return false; + } + + const std::string host; + const UInt16 port; + const bool is_local; + std::unique_ptr pool; + Block sample_block; + Context context; + const std::string table; + const std::string load_all_query; +}; + +} diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h index 63ccadcc005..aa322bcc20e 100644 --- a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,11 @@ public: { return ext::make_unique(config, config_prefix + "mysql.", sample_block, context); } + else if (config.has(config_prefix + "clickhouse")) + { + return ext::make_unique(config, config_prefix + "clickhouse.", + sample_block, context); + } throw Exception{"unsupported source type"}; } diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h index 2a340ccc6c1..c55b552674a 100644 --- a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -2,12 +2,10 @@ #include #include -#include #include #include #include #include -#include #include namespace DB From 2482560b15c85a5e99cc9f8d8beb4ef2649c8977 Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Thu, 29 Jan 2015 15:53:59 +0300 Subject: [PATCH 13/43] DB::Connection: don't log Ping requests --- dbms/src/Client/Connection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 3f14786085d..339a18ea249 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -165,7 +165,7 @@ void Connection::forceConnected() bool Connection::ping() { - LOG_TRACE(log_wrapper.get(), "Ping (" << getServerAddress() << ")"); + // LOG_TRACE(log_wrapper.get(), "Ping (" << getServerAddress() << ")"); try { From 4bbdb2320e9fbb652d160fd88205ada1fa301fb7 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 29 Jan 2015 16:53:48 +0300 Subject: [PATCH 14/43] dbms: add HashedDictionary [#METR-13298] --- .../DB/Dictionaries/DictionaryFactory.h | 6 +- dbms/include/DB/Dictionaries/FlatDictionary.h | 38 +- .../DB/Dictionaries/HashedDictionary.h | 347 ++++++++++++++++++ 3 files changed, 369 insertions(+), 22 deletions(-) create mode 100644 dbms/include/DB/Dictionaries/HashedDictionary.h diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index 6a5836ab320..6d3410119d3 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -29,10 +30,7 @@ public: } else if (config.has(layout_prefix + "hashed")) { - throw Exception{ - "Dictionary of type 'hashed' is not yet implemented", - ErrorCodes::NOT_IMPLEMENTED - }; + return ext::make_unique(dict_struct, config, config_prefix, std::move(source_ptr)); } else if (config.has(layout_prefix + "cache")) { diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 8852ac0b41f..c15712f4e2c 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -1,7 +1,8 @@ #pragma once -#include #include +#include +#include #include #include #include @@ -17,19 +18,18 @@ class FlatDictionary final : public IDictionary public: FlatDictionary(const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr) - : source_ptr{std::move(source_ptr)} + : source_ptr{std::move(source_ptr)} { const auto size = dict_struct.attributes.size(); - attributes.resize(size); - for (const auto idx : ext::range(0, size)) + attributes.reserve(size); + for (const auto & attribute : dict_struct.attributes) { - const auto & attribute = dict_struct.attributes[idx]; - attribute_index_by_name.emplace(attribute.name, idx); - attributes[idx] = std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), - attribute.null_value)); + attribute_index_by_name.emplace(attribute.name, attributes.size()); + attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value))); if (attribute.hierarchical) - hierarchical_attribute = &attributes[idx]; + hierarchical_attribute = &attributes.back(); } auto stream = this->source_ptr->loadAll(); @@ -49,6 +49,8 @@ public: } } + bool hasHierarchy() const override { return hierarchical_attribute; } + id_t toParent(const id_t id) const override { const auto attr = hierarchical_attribute; @@ -75,7 +77,7 @@ public: }; } - #define DECLARE_SAFE_GETTER(TYPE, NAME, LC_TYPE) \ +#define DECLARE_SAFE_GETTER(TYPE, NAME, LC_TYPE) \ TYPE get##NAME(const std::string & attribute_name, const id_t id) const override\ {\ const auto idx = getAttributeIndex(attribute_name);\ @@ -100,7 +102,7 @@ public: DECLARE_SAFE_GETTER(Float32, Float32, float32) DECLARE_SAFE_GETTER(Float64, Float64, float64) DECLARE_SAFE_GETTER(StringRef, String, string) - #undef DECLARE_SAFE_GETTER +#undef DECLARE_SAFE_GETTER std::size_t getAttributeIndex(const std::string & attribute_name) const override { @@ -114,7 +116,7 @@ public: return it->second; } - #define DECLARE_TYPE_CHECKER(NAME, LC_NAME)\ +#define DECLARE_TYPE_CHECKER(NAME, LC_NAME)\ bool is##NAME(const std::size_t attribute_idx) const override\ {\ return attributes[attribute_idx].type == attribute_type::LC_NAME;\ @@ -130,13 +132,15 @@ public: DECLARE_TYPE_CHECKER(Float32, float32) DECLARE_TYPE_CHECKER(Float64, float64) DECLARE_TYPE_CHECKER(String, string) - #undef DECLARE_TYPE_CHECKER +#undef DECLARE_TYPE_CHECKER - #define DECLARE_UNSAFE_GETTER(TYPE, NAME, LC_NAME)\ +#define DECLARE_UNSAFE_GETTER(TYPE, NAME, LC_NAME)\ TYPE get##NAME##Unsafe(const std::size_t attribute_idx, const id_t id) const override\ {\ const auto & attribute = attributes[attribute_idx];\ - return id < attribute.LC_NAME##_array->size() ? (*attribute.LC_NAME##_array)[id] : attribute.LC_NAME##_null_value;\ + if (id < attribute.LC_NAME##_array->size())\ + return (*attribute.LC_NAME##_array)[id];\ + return attribute.LC_NAME##_null_value;\ } DECLARE_UNSAFE_GETTER(UInt8, UInt8, uint8) DECLARE_UNSAFE_GETTER(UInt16, UInt16, uint16) @@ -149,12 +153,10 @@ public: DECLARE_UNSAFE_GETTER(Float32, Float32, float32) DECLARE_UNSAFE_GETTER(Float64, Float64, float64) DECLARE_UNSAFE_GETTER(StringRef, String, string) - #undef DECLARE_UNSAFE_GETTER +#undef DECLARE_UNSAFE_GETTER bool isComplete() const override { return true; } - bool hasHierarchy() const override { return hierarchical_attribute; } - struct attribute_t { attribute_type type; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h new file mode 100644 index 00000000000..8609ac148b1 --- /dev/null +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -0,0 +1,347 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class HashedDictionary final : public IDictionary +{ +public: + HashedDictionary(const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, DictionarySourcePtr source_ptr) + : source_ptr{std::move(source_ptr)} + { + const auto size = dict_struct.attributes.size(); + attributes.reserve(size); + for (const auto & attribute : dict_struct.attributes) + { + attribute_index_by_name.emplace(attribute.name, attributes.size()); + attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value))); + + if (attribute.hierarchical) + hierarchical_attribute = &attributes.back(); + } + + auto stream = this->source_ptr->loadAll(); + + while (const auto block = stream->read()) + { + const auto & id_column = *block.getByPosition(0).column; + + for (const auto attribute_idx : ext::range(0, attributes.size())) + { + const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; + auto & attribute = attributes[attribute_idx]; + + for (const auto row_idx : ext::range(0, id_column.size())) + setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); + } + } + } + + bool hasHierarchy() const override { return hierarchical_attribute; } + + id_t toParent(const id_t id) const override + { + const auto attr = hierarchical_attribute; + + switch (hierarchical_attribute->type) + { + case attribute_type::uint8: + { + const auto it = attr->uint8_map->find(id); + return it != attr->uint8_map->end() ? it->second : attr->uint8_null_value; + } + case attribute_type::uint16: + { + const auto it = attr->uint16_map->find(id); + return it != attr->uint16_map->end() ? it->second : attr->uint16_null_value; + } + case attribute_type::uint32: + { + const auto it = attr->uint32_map->find(id); + return it != attr->uint32_map->end() ? it->second : attr->uint32_null_value; + } + case attribute_type::uint64: + { + const auto it = attr->uint64_map->find(id); + return it != attr->uint64_map->end() ? it->second : attr->uint64_null_value; + } + case attribute_type::int8: + { + const auto it = attr->int8_map->find(id); + return it != attr->int8_map->end() ? it->second : attr->int8_null_value; + } + case attribute_type::int16: + { + const auto it = attr->int16_map->find(id); + return it != attr->int16_map->end() ? it->second : attr->int16_null_value; + } + case attribute_type::int32: + { + const auto it = attr->int32_map->find(id); + return it != attr->int32_map->end() ? it->second : attr->int32_null_value; + } + case attribute_type::int64: + { + const auto it = attr->int64_map->find(id); + return it != attr->int64_map->end() ? it->second : attr->int64_null_value; + } + case attribute_type::float32: + case attribute_type::float64: + case attribute_type::string: + break; + }; + + throw Exception{ + "Hierarchical attribute has non-integer type " + toString(hierarchical_attribute->type), + ErrorCodes::TYPE_MISMATCH + }; + } + +#define DECLARE_SAFE_GETTER(TYPE, NAME, LC_TYPE) \ + TYPE get##NAME(const std::string & attribute_name, const id_t id) const override\ + {\ + const auto idx = getAttributeIndex(attribute_name);\ + const auto & attribute = attributes[idx];\ + if (attribute.type != attribute_type::LC_TYPE)\ + throw Exception{\ + "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ + ErrorCodes::TYPE_MISMATCH\ + };\ + const auto it = attribute.LC_TYPE##_map->find(id);\ + if (it != attribute.LC_TYPE##_map->end())\ + return it->second;\ + return attribute.LC_TYPE##_null_value;\ + } + DECLARE_SAFE_GETTER(UInt8, UInt8, uint8) + DECLARE_SAFE_GETTER(UInt16, UInt16, uint16) + DECLARE_SAFE_GETTER(UInt32, UInt32, uint32) + DECLARE_SAFE_GETTER(UInt64, UInt64, uint64) + DECLARE_SAFE_GETTER(Int8, Int8, int8) + DECLARE_SAFE_GETTER(Int16, Int16, int16) + DECLARE_SAFE_GETTER(Int32, Int32, int32) + DECLARE_SAFE_GETTER(Int64, Int64, int64) + DECLARE_SAFE_GETTER(Float32, Float32, float32) + DECLARE_SAFE_GETTER(Float64, Float64, float64) + DECLARE_SAFE_GETTER(StringRef, String, string) +#undef DECLARE_SAFE_GETTER + + std::size_t getAttributeIndex(const std::string & attribute_name) const override + { + const auto it = attribute_index_by_name.find(attribute_name); + if (it == std::end(attribute_index_by_name)) + throw Exception{ + "No such attribute '" + attribute_name + "'", + ErrorCodes::BAD_ARGUMENTS + }; + + return it->second; + } + +#define DECLARE_TYPE_CHECKER(NAME, LC_NAME)\ + bool is##NAME(const std::size_t attribute_idx) const override\ + {\ + return attributes[attribute_idx].type == attribute_type::LC_NAME;\ + } + DECLARE_TYPE_CHECKER(UInt8, uint8) + DECLARE_TYPE_CHECKER(UInt16, uint16) + DECLARE_TYPE_CHECKER(UInt32, uint32) + DECLARE_TYPE_CHECKER(UInt64, uint64) + DECLARE_TYPE_CHECKER(Int8, int8) + DECLARE_TYPE_CHECKER(Int16, int16) + DECLARE_TYPE_CHECKER(Int32, int32) + DECLARE_TYPE_CHECKER(Int64, int64) + DECLARE_TYPE_CHECKER(Float32, float32) + DECLARE_TYPE_CHECKER(Float64, float64) + DECLARE_TYPE_CHECKER(String, string) +#undef DECLARE_TYPE_CHECKER + +#define DECLARE_UNSAFE_GETTER(TYPE, NAME, LC_NAME)\ + TYPE get##NAME##Unsafe(const std::size_t attribute_idx, const id_t id) const override\ + {\ + const auto & attribute = attributes[attribute_idx];\ + const auto it = attribute.LC_NAME##_map->find(id);\ + if (it != attribute.LC_NAME##_map->end())\ + return it->second;\ + return attribute.LC_NAME##_null_value;\ + } + DECLARE_UNSAFE_GETTER(UInt8, UInt8, uint8) + DECLARE_UNSAFE_GETTER(UInt16, UInt16, uint16) + DECLARE_UNSAFE_GETTER(UInt32, UInt32, uint32) + DECLARE_UNSAFE_GETTER(UInt64, UInt64, uint64) + DECLARE_UNSAFE_GETTER(Int8, Int8, int8) + DECLARE_UNSAFE_GETTER(Int16, Int16, int16) + DECLARE_UNSAFE_GETTER(Int32, Int32, int32) + DECLARE_UNSAFE_GETTER(Int64, Int64, int64) + DECLARE_UNSAFE_GETTER(Float32, Float32, float32) + DECLARE_UNSAFE_GETTER(Float64, Float64, float64) + DECLARE_UNSAFE_GETTER(StringRef, String, string) +#undef DECLARE_UNSAFE_GETTER + + bool isComplete() const override { return true; } + + struct attribute_t + { + attribute_type type; + UInt8 uint8_null_value; + UInt16 uint16_null_value; + UInt32 uint32_null_value; + UInt64 uint64_null_value; + Int8 int8_null_value; + Int16 int16_null_value; + Int32 int32_null_value; + Int64 int64_null_value; + Float32 float32_null_value; + Float64 float64_null_value; + String string_null_value; + std::unique_ptr> uint8_map; + std::unique_ptr> uint16_map; + std::unique_ptr> uint32_map; + std::unique_ptr> uint64_map; + std::unique_ptr> int8_map; + std::unique_ptr> int16_map; + std::unique_ptr> int32_map; + std::unique_ptr> int64_map; + std::unique_ptr> float32_map; + std::unique_ptr> float64_map; + std::unique_ptr string_arena; + std::unique_ptr> string_map; + }; + + attribute_t createAttributeWithType(const attribute_type type, const std::string & null_value) + { + attribute_t attr{type}; + + switch (type) + { + case attribute_type::uint8: + attr.uint8_null_value = DB::parse(null_value); + attr.uint8_map.reset(new HashMap); + break; + case attribute_type::uint16: + attr.uint16_null_value = DB::parse(null_value); + attr.uint16_map.reset(new HashMap); + break; + case attribute_type::uint32: + attr.uint32_null_value = DB::parse(null_value); + attr.uint32_map.reset(new HashMap); + break; + case attribute_type::uint64: + attr.uint64_null_value = DB::parse(null_value); + attr.uint64_map.reset(new HashMap); + break; + case attribute_type::int8: + attr.int8_null_value = DB::parse(null_value); + attr.int8_map.reset(new HashMap); + break; + case attribute_type::int16: + attr.int16_null_value = DB::parse(null_value); + attr.int16_map.reset(new HashMap); + break; + case attribute_type::int32: + attr.int32_null_value = DB::parse(null_value); + attr.int32_map.reset(new HashMap); + break; + case attribute_type::int64: + attr.int64_null_value = DB::parse(null_value); + attr.int64_map.reset(new HashMap); + break; + case attribute_type::float32: + attr.float32_null_value = DB::parse(null_value); + attr.float32_map.reset(new HashMap); + break; + case attribute_type::float64: + attr.float64_null_value = DB::parse(null_value); + attr.float64_map.reset(new HashMap); + break; + case attribute_type::string: + attr.string_null_value = null_value; + attr.string_arena.reset(new Arena); + attr.string_map.reset(new HashMap); + break; + } + + return attr; + } + + void setAttributeValue(attribute_t & attribute, const id_t id, const Field & value) + { + switch (attribute.type) + { + case attribute_type::uint8: + { + attribute.uint8_map->insert({ id, value.get() }); + break; + } + case attribute_type::uint16: + { + attribute.uint16_map->insert({ id, value.get() }); + break; + } + case attribute_type::uint32: + { + attribute.uint32_map->insert({ id, value.get() }); + break; + } + case attribute_type::uint64: + { + attribute.uint64_map->insert({ id, value.get() }); + break; + } + case attribute_type::int8: + { + attribute.int8_map->insert({ id, value.get() }); + break; + } + case attribute_type::int16: + { + attribute.int16_map->insert({ id, value.get() }); + break; + } + case attribute_type::int32: + { + attribute.int32_map->insert({ id, value.get() }); + break; + } + case attribute_type::int64: + { + attribute.int64_map->insert({ id, value.get() }); + break; + } + case attribute_type::float32: + { + attribute.float32_map->insert({ id, value.get() }); + break; + } + case attribute_type::float64: + { + attribute.float64_map->insert({ id, value.get() }); + break; + } + case attribute_type::string: + { + const auto & string = value.get(); + const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); + attribute.string_map->insert({ id, StringRef{string_in_arena, string.size()} }); + break; + } + }; + } + + std::map attribute_index_by_name; + std::vector attributes; + const attribute_t * hierarchical_attribute = nullptr; + + DictionarySourcePtr source_ptr; +}; + +} From a66af0668efab6bceaad899821da69a2c12db40b Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 29 Jan 2015 17:46:15 +0300 Subject: [PATCH 15/43] dbms: devirtualize dictionary access [#METR-13298] --- .../DB/Dictionaries/DictionaryFactory.h | 2 +- .../DB/Dictionaries/DictionaryStructure.h | 4 +- dbms/include/DB/Dictionaries/FlatDictionary.h | 3 + .../DB/Dictionaries/HashedDictionary.h | 3 + dbms/include/DB/Dictionaries/IDictionary.h | 2 + .../DB/Functions/FunctionsDictionaries.h | 121 ++++++++++++++---- 6 files changed, 110 insertions(+), 25 deletions(-) diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index 6d3410119d3..bb09f534e59 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -1,9 +1,9 @@ #pragma once #include -#include #include #include +#include #include #include #include diff --git a/dbms/include/DB/Dictionaries/DictionaryStructure.h b/dbms/include/DB/Dictionaries/DictionaryStructure.h index 47e445f5e31..27c3e7721bc 100644 --- a/dbms/include/DB/Dictionaries/DictionaryStructure.h +++ b/dbms/include/DB/Dictionaries/DictionaryStructure.h @@ -24,7 +24,7 @@ enum class attribute_type string }; -attribute_type getAttributeTypeByName(const std::string & type) +inline attribute_type getAttributeTypeByName(const std::string & type) { static const std::unordered_map dictionary{ { "UInt8", attribute_type::uint8 }, @@ -50,7 +50,7 @@ attribute_type getAttributeTypeByName(const std::string & type) }; } -std::string toString(const attribute_type type) +inline std::string toString(const attribute_type type) { switch (type) { diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index c15712f4e2c..05f3b4b6f92 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,8 @@ public: } } + std::string getTypeName() const override { return "FlatDictionary"; } + bool hasHierarchy() const override { return hierarchical_attribute; } id_t toParent(const id_t id) const override diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index 8609ac148b1..a7e8f069409 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,8 @@ public: } } + std::string getTypeName() const override { return "HashedDictionary"; } + bool hasHierarchy() const override { return hierarchical_attribute; } id_t toParent(const id_t id) const override diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index f76823568d6..65d424d3ba1 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -12,6 +12,8 @@ class IDictionary public: using id_t = std::uint64_t; + virtual std::string getTypeName() const = 0; + virtual bool hasHierarchy() const = 0; /// do not call unless you ensure that hasHierarchy() returns true diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index 4dbc24586a5..ff6f690989b 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include namespace DB @@ -849,6 +851,23 @@ private: }; auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + const auto dict_ptr = dict.get(); + + if (!executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr)) + throw Exception{ + "Unsupported dictionary type " + dict_ptr->getTypeName(), + ErrorCodes::UNKNOWN_TYPE + }; + } + + template + bool executeDispatch(Block & block, const ColumnNumbers & arguments, const size_t result, + const IDictionary * const dictionary) + { + const auto dict = typeid_cast(dictionary); + if (!dict) + return false; const auto attr_name_col = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); if (!attr_name_col) @@ -874,10 +893,12 @@ private: ErrorCodes::ILLEGAL_COLUMN }; } + + return true; } - template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * const dictionary, const std::string & attr_name, const IColumn * const id_col_untyped) { if (const auto id_col = typeid_cast *>(id_col_untyped)) @@ -1017,6 +1038,22 @@ private: auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); const auto dict_ptr = dict.get(); + if (!executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr)) + throw Exception{ + "Unsupported dictionary type " + dict_ptr->getTypeName(), + ErrorCodes::UNKNOWN_TYPE + }; + } + + template + bool executeDispatch(Block & block, const ColumnNumbers & arguments, const size_t result, + const IDictionary * const dictionary) + { + const auto dict = typeid_cast(dictionary); + if (!dict) + return false; + const auto attr_name_col = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); if (!attr_name_col) throw Exception{ @@ -1027,24 +1064,26 @@ private: const auto & attr_name = attr_name_col->getData(); const auto id_col = block.getByPosition(arguments[2]).column.get(); - if (!execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col) && - !execute(block, result, dict_ptr, attr_name, id_col)) + if (!execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col) && + !execute(block, result, dict, attr_name, id_col)) { throw Exception{ "Third argument of function " + getName() + " must be integral", ErrorCodes::ILLEGAL_COLUMN }; } + + return true; } - template - bool execute(Block & block, const size_t result, const IDictionary * const dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * const dictionary, const std::string & attr_name, const IColumn * const id_col_untyped) { if (const auto id_col = typeid_cast *>(id_col_untyped)) @@ -1162,6 +1201,7 @@ private: }; auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + const auto dict_ptr = dict.get(); if (!dict->hasHierarchy()) throw Exception{ @@ -1169,6 +1209,22 @@ private: ErrorCodes::UNSUPPORTED_METHOD }; + if (!executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr)) + throw Exception{ + "Unsupported dictionary type " + dict_ptr->getTypeName(), + ErrorCodes::UNKNOWN_TYPE + }; + } + + template + bool executeDispatch(Block & block, const ColumnNumbers & arguments, const size_t result, + const IDictionary * const dictionary) + { + const auto dict = typeid_cast(dictionary); + if (!dict) + return false; + const auto id_col = block.getByPosition(arguments[1]).column.get(); if (!execute(block, result, dict, id_col) && !execute(block, result, dict, id_col) && @@ -1184,10 +1240,12 @@ private: ErrorCodes::ILLEGAL_COLUMN }; } + + return true; } - template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * const dictionary, const IColumn * const id_col_untyped) { if (const auto id_col = typeid_cast *>(id_col_untyped)) @@ -1320,6 +1378,7 @@ private: }; auto dict = dictionaries.getExternalDictionary(dict_name_col->getData()); + const auto dict_ptr = dict.get(); if (!dict->hasHierarchy()) throw Exception{ @@ -1327,6 +1386,22 @@ private: ErrorCodes::UNSUPPORTED_METHOD }; + if (!executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr)) + throw Exception{ + "Unsupported dictionary type " + dict_ptr->getTypeName(), + ErrorCodes::UNKNOWN_TYPE + }; + } + + template + bool executeDispatch(Block & block, const ColumnNumbers & arguments, const size_t result, + const IDictionary * const dictionary) + { + const auto dict = typeid_cast(dictionary); + if (!dict) + return false; + const auto child_id_col = block.getByPosition(arguments[1]).column.get(); const auto ancestor_id_col = block.getByPosition(arguments[2]).column.get(); if (!execute(block, result, dict, child_id_col, ancestor_id_col) && @@ -1344,10 +1419,12 @@ private: ErrorCodes::ILLEGAL_COLUMN }; } + + return true; } - template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * const dictionary, const IColumn * const child_id_col_untyped, const IColumn * const ancestor_id_col_untyped) { if (execute>(block, result, dictionary, child_id_col_untyped, ancestor_id_col_untyped) || @@ -1357,8 +1434,8 @@ private: return false; } - template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * dictionary, const IColumn * const child_id_col_untyped, const IColumn * const ancestor_id_col_untyped) { if (const auto child_id_col = typeid_cast(child_id_col_untyped)) @@ -1383,8 +1460,8 @@ private: return false; } - template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * const dictionary, const ColumnVector * const child_id_col, const IColumn * const ancestor_id_col_untyped) { if (const auto ancestor_id_col = typeid_cast *>(ancestor_id_col_untyped)) @@ -1423,8 +1500,8 @@ private: return false; } - template - bool execute(Block & block, const size_t result, const MultiVersion::Version & dictionary, + template + bool execute(Block & block, const size_t result, const DictionaryType * const dictionary, const ColumnConst * const child_id_col, const IColumn * const ancestor_id_col_untyped) { if (const auto ancestor_id_col = typeid_cast *>(ancestor_id_col_untyped)) From 647cd1b0eb6138c739910535ac9cc06448465af8 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 29 Jan 2015 18:47:21 +0300 Subject: [PATCH 16/43] dbms: check dictionary source modification prior to reloading [#METR-13298] Mysql and Clickhouse sources are not currently checked and will always say they are modified. Reload periods are not currently checked. --- .../Dictionaries/ClickhouseDictionarySource.h | 4 ++ .../DB/Dictionaries/DictionaryFactory.h | 7 ++- .../DB/Dictionaries/FileDictionarySource.h | 12 +++- dbms/include/DB/Dictionaries/FlatDictionary.h | 16 ++++-- .../DB/Dictionaries/HashedDictionary.h | 16 ++++-- dbms/include/DB/Dictionaries/IDictionary.h | 13 +++-- .../DB/Dictionaries/IDictionarySource.h | 1 + .../DB/Dictionaries/MysqlDictionarySource.h | 3 + dbms/include/DB/Interpreters/Dictionaries.h | 2 +- dbms/src/Interpreters/Dictionaries.cpp | 55 ++++++++++++++++--- 10 files changed, 101 insertions(+), 28 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index fdb360edc47..89af77993a5 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -61,6 +61,10 @@ private: }; } + /// @todo check update_time with SHOW TABLE STATUS LIKE '%table%' + bool isModified() const override { return true; } + + static std::string composeLoadAllQuery(const Block & block, const std::string & table) { std::string query{"SELECT "}; diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index bb09f534e59..8127de614d8 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -14,7 +14,8 @@ namespace DB class DictionaryFactory : public Singleton { public: - DictionaryPtr create(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + DictionaryPtr create(const std::string & name, Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, const Context & context) const { auto dict_struct = DictionaryStructure::fromConfig(config, config_prefix + "structure"); @@ -26,11 +27,11 @@ public: if (config.has(layout_prefix + "flat")) { - return ext::make_unique(dict_struct, config, config_prefix, std::move(source_ptr)); + return ext::make_unique(name, dict_struct, config, config_prefix, std::move(source_ptr)); } else if (config.has(layout_prefix + "hashed")) { - return ext::make_unique(dict_struct, config, config_prefix, std::move(source_ptr)); + return ext::make_unique(name, dict_struct, config, config_prefix, std::move(source_ptr)); } else if (config.has(layout_prefix + "cache")) { diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 1b39db51ba3..42e8baef021 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace DB { @@ -15,7 +17,9 @@ class FileDictionarySource final : public IDictionarySource public: FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, const Context & context) - : filename{filename}, format{format}, sample_block{sample_block}, context(context) {} + : filename{filename}, format{format}, sample_block{sample_block}, context(context), + last_modification{getLastModification()} + {} private: BlockInputStreamPtr loadAll() override @@ -23,6 +27,7 @@ private: auto in_ptr = ext::make_unique(filename); auto stream = context.getFormatFactory().getInput( format, *in_ptr, sample_block, max_block_size, context.getDataTypeFactory()); + last_modification = getLastModification(); return new OwningBufferBlockInputStream{stream, std::move(in_ptr)}; } @@ -43,10 +48,15 @@ private: }; } + bool isModified() const override { return getLastModification() > last_modification; } + + Poco::Timestamp getLastModification() const { return Poco::File{filename}.getLastModified(); } + const std::string filename; const std::string format; Block sample_block; const Context & context; + Poco::Timestamp last_modification; }; } diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 05f3b4b6f92..c75481fcc09 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -17,9 +17,10 @@ const auto max_array_size = 500000; class FlatDictionary final : public IDictionary { public: - FlatDictionary(const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, + FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr) - : source_ptr{std::move(source_ptr)} + : name{name}, source_ptr{std::move(source_ptr)} { const auto size = dict_struct.attributes.size(); attributes.reserve(size); @@ -50,8 +51,14 @@ public: } } + std::string getName() const override { return name; } + std::string getTypeName() const override { return "FlatDictionary"; } + bool isCached() const override { return false; } + + const IDictionarySource * const getSource() const override { return source_ptr.get(); } + bool hasHierarchy() const override { return hierarchical_attribute; } id_t toParent(const id_t id) const override @@ -158,8 +165,6 @@ public: DECLARE_UNSAFE_GETTER(StringRef, String, string) #undef DECLARE_UNSAFE_GETTER - bool isComplete() const override { return true; } - struct attribute_t { attribute_type type; @@ -347,11 +352,12 @@ public: }; } + const std::string name; std::map attribute_index_by_name; std::vector attributes; const attribute_t * hierarchical_attribute = nullptr; - DictionarySourcePtr source_ptr; + const DictionarySourcePtr source_ptr; }; } diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index a7e8f069409..8145a7d8b83 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -15,9 +15,10 @@ namespace DB class HashedDictionary final : public IDictionary { public: - HashedDictionary(const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, + HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr) - : source_ptr{std::move(source_ptr)} + : name{name}, source_ptr{std::move(source_ptr)} { const auto size = dict_struct.attributes.size(); attributes.reserve(size); @@ -48,8 +49,14 @@ public: } } + std::string getName() const override { return name; } + std::string getTypeName() const override { return "HashedDictionary"; } + bool isCached() const override { return false; } + + const IDictionarySource * const getSource() const override { return source_ptr.get(); } + bool hasHierarchy() const override { return hierarchical_attribute; } id_t toParent(const id_t id) const override @@ -190,8 +197,6 @@ public: DECLARE_UNSAFE_GETTER(StringRef, String, string) #undef DECLARE_UNSAFE_GETTER - bool isComplete() const override { return true; } - struct attribute_t { attribute_type type; @@ -340,11 +345,12 @@ public: }; } + const std::string name; std::map attribute_index_by_name; std::vector attributes; const attribute_t * hierarchical_attribute = nullptr; - DictionarySourcePtr source_ptr; + const DictionarySourcePtr source_ptr; }; } diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index 65d424d3ba1..9013eeb8993 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -7,13 +7,22 @@ namespace DB { +class IDictionarySource; + class IDictionary { public: using id_t = std::uint64_t; + virtual std::string getName() const = 0; + virtual std::string getTypeName() const = 0; + virtual bool isCached() const = 0; + virtual void reload() {} + + virtual const IDictionarySource * const getSource() const = 0; + virtual bool hasHierarchy() const = 0; /// do not call unless you ensure that hasHierarchy() returns true @@ -71,10 +80,6 @@ public: virtual Float64 getFloat64Unsafe(std::size_t attribute_idx, id_t id) const = 0; virtual StringRef getStringUnsafe(std::size_t attribute_idx, id_t id) const = 0; - /// entirely-loaded dictionaries should be immutable - virtual bool isComplete() const = 0; - virtual void reload() {} - virtual ~IDictionary() = default; }; diff --git a/dbms/include/DB/Dictionaries/IDictionarySource.h b/dbms/include/DB/Dictionaries/IDictionarySource.h index 526047f6fb9..ecabc800529 100644 --- a/dbms/include/DB/Dictionaries/IDictionarySource.h +++ b/dbms/include/DB/Dictionaries/IDictionarySource.h @@ -12,6 +12,7 @@ public: virtual BlockInputStreamPtr loadAll() = 0; virtual BlockInputStreamPtr loadId(const std::uint64_t id) = 0; virtual BlockInputStreamPtr loadIds(const std::vector ids) = 0; + virtual bool isModified() const = 0; virtual ~IDictionarySource() = default; }; diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h index c55b552674a..6cffea85c64 100644 --- a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -47,6 +47,9 @@ private: }; } + /// @todo check update_time with SHOW TABLE STATUS LIKE '%table%' + bool isModified() const override { return true; } + static config_ptr_t getLayeredConfig(Poco::Util::AbstractConfiguration & config) { config_ptr_t layered_config{new Poco::Util::LayeredConfiguration}; diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index 7ad53a4844e..0ac7544b0e5 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -149,7 +149,7 @@ private: void reloadExternalsPeriodically() { - const auto check_period = 1000; + const auto check_period = 60 * 1000; while (true) { if (destroy.tryWait(check_period)) diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 93b46c6ffe5..1ba68a2eaca 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -6,6 +6,26 @@ namespace DB { +namespace +{ + std::string findKeyForDictionary(Poco::Util::XMLConfiguration & config, const std::string & name) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(keys); + + for (const auto & key : keys) + { + if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) + continue; + + if (name == config.getString(key + ".name")) + return key; + } + + return {}; + } +} + void Dictionaries::reloadExternals() { const std::lock_guard lock{externals_mutex}; @@ -48,20 +68,22 @@ void Dictionaries::reloadExternals() auto it = external_dictionaries.find(name); if (it == std::end(external_dictionaries)) { - auto dict_ptr = DictionaryFactory::instance().create(*config, prefix, context); + /// such a dictionary is not present at the moment + auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); } else { + /// dictionary exists, it may be desirable to reload it auto & current = it->second->get(); - if (current->isComplete()) + if (current->isCached()) + const_cast(current.get())->reload(); + else { /// @todo check that timeout has passed - auto dict_ptr = DictionaryFactory::instance().create(*config, prefix, context); + auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); it->second->set(dict_ptr.release()); } - else - const_cast(current.get())->reload(); } } catch (const Exception &) @@ -72,18 +94,33 @@ void Dictionaries::reloadExternals() } else { + config_ptr_t config; for (auto & dictionary : external_dictionaries) { try { - auto & current = dictionary.second->get(); - if (current->isComplete()) + auto current = dictionary.second->get(); + if (current->isCached()) { - /// @todo check that timeout has passed and load new version + const_cast(current.get())->reload(); } else { - const_cast(current.get())->reload(); + /// @todo check that timeout has passed and load new version + if (!current->getSource()->isModified()) + continue; + + /// source has supposedly been modified, load it over again + if (!config) + config.reset(new Poco::Util::XMLConfiguration{config_path}); + + const auto & name = current->getName(); + const auto & key = findKeyForDictionary(*config, name); + if (!key.empty()) + { + auto dict_ptr = DictionaryFactory::instance().create(name, *config, key + '.', context); + dictionary.second->set(dict_ptr.release()); + } } } catch (const Exception &) From 9eff84960a678f609faf5a141630e15babbd0981 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 30 Jan 2015 14:51:59 +0300 Subject: [PATCH 17/43] dbms: query table modification time in MysqlDictionarySource [#METR-13298] --- .../DB/Dictionaries/FileDictionarySource.h | 2 +- .../DB/Dictionaries/MysqlDictionarySource.h | 31 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 42e8baef021..6ae84efdecd 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -18,7 +18,7 @@ public: FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, const Context & context) : filename{filename}, format{format}, sample_block{sample_block}, context(context), - last_modification{getLastModification()} + last_modification{getLastModification()} {} private: diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h index 6cffea85c64..1e4b00f74e2 100644 --- a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -22,7 +22,8 @@ public: pool{*layered_config_ptr, config_prefix}, sample_block{sample_block}, context(context), table{config.getString(config_prefix + "table")}, - load_all_query{composeLoadAllQuery(sample_block, table)} + load_all_query{composeLoadAllQuery(sample_block, table)}, + last_modification{getLastModification()} {} private: @@ -47,8 +48,29 @@ private: }; } - /// @todo check update_time with SHOW TABLE STATUS LIKE '%table%' - bool isModified() const override { return true; } + bool isModified() const override { return getLastModification() > last_modification; } + + mysqlxx::DateTime getLastModification() const + { + const auto Create_time_idx = 11; + const auto Update_time_idx = 12; + + try + { + auto connection = pool.Get(); + auto query = connection->query("SHOW TABLE STATUS LIKE '%" + table + "%';"); + auto result = query.use(); + auto row = result.fetch(); + const auto & update_time = row[Update_time_idx]; + return !update_time.isNull() ? update_time.getDateTime() : row[Create_time_idx].getDateTime(); + } + catch (...) + { + tryLogCurrentException("MysqlDictionarySource"); + } + + return {}; + } static config_ptr_t getLayeredConfig(Poco::Util::AbstractConfiguration & config) { @@ -77,11 +99,12 @@ private: } const config_ptr_t layered_config_ptr; - mysqlxx::Pool pool; + mutable mysqlxx::Pool pool; Block sample_block; const Context & context; const std::string table; const std::string load_all_query; + mysqlxx::DateTime last_modification; }; } From fb333b16d788313daa7c1e6754821148ebafba56 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 30 Jan 2015 16:43:16 +0300 Subject: [PATCH 18/43] dbms: incapsulate dictionary source data and allow cloning [#METR-13298]; add cloning for dictionaries --- .../Dictionaries/ClickhouseDictionarySource.h | 49 ++++++++---- .../DB/Dictionaries/DictionaryFactory.h | 4 +- .../DB/Dictionaries/FileDictionarySource.h | 10 ++- dbms/include/DB/Dictionaries/FlatDictionary.h | 80 +++++++++++-------- .../DB/Dictionaries/HashedDictionary.h | 80 +++++++++++-------- dbms/include/DB/Dictionaries/IDictionary.h | 6 +- .../DB/Dictionaries/IDictionarySource.h | 7 +- .../DB/Dictionaries/MysqlDictionarySource.h | 46 ++++++----- dbms/src/Interpreters/Dictionaries.cpp | 77 +++++++----------- 9 files changed, 203 insertions(+), 156 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index 89af77993a5..f409186e994 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -1,43 +1,53 @@ #pragma once -#include #include +#include #include -#include -#include #include #include #include +#include +#include #include namespace DB { +const auto max_connections = 1; + class ClickhouseDictionarySource final : public IDictionarySource { static const auto max_block_size = 8192; - static const auto max_connections = 1; public: ClickhouseDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, const Context & context) : host{config.getString(config_prefix + "host")}, port(config.getInt(config_prefix + "port")), + user{config.getString(config_prefix + "user", "")}, + password{config.getString(config_prefix + "password", "")}, + db{config.getString(config_prefix + "db", "")}, + table{config.getString(config_prefix + "table")}, + sample_block{sample_block}, context{context}, is_local{isLocal(host, port)}, pool{is_local ? nullptr : ext::make_unique( - max_connections, host, port, - config.getString(config_prefix + "db", ""), - config.getString(config_prefix + "user", ""), - config.getString(config_prefix + "password", ""), - context.getDataTypeFactory(), - "ClickhouseDictionarySource") + max_connections, host, port, db, user, password, context.getDataTypeFactory(), + "ClickhouseDictionarySource") }, - sample_block{sample_block}, context(context), - table{config.getString(config_prefix + "table")}, load_all_query{composeLoadAllQuery(sample_block, table)} {} -private: + ClickhouseDictionarySource(const ClickhouseDictionarySource & other) + : host{other.host}, port{other.port}, user{other.user}, password{other.password}, + db{other.db}, table{other.db}, + sample_block{other.sample_block}, context{other.context}, + is_local{other.is_local}, + pool{is_local ? nullptr : ext::make_unique( + max_connections, host, port, db, user, password, context.getDataTypeFactory(), + "ClickhouseDictionarySource")}, + load_all_query{other.load_all_query} + {} + BlockInputStreamPtr loadAll() override { if (is_local) @@ -61,10 +71,12 @@ private: }; } - /// @todo check update_time with SHOW TABLE STATUS LIKE '%table%' + /// @todo check update time somehow bool isModified() const override { return true; } + DictionarySourcePtr clone() const override { return ext::make_unique(*this); } +private: static std::string composeLoadAllQuery(const Block & block, const std::string & table) { std::string query{"SELECT "}; @@ -102,11 +114,14 @@ private: const std::string host; const UInt16 port; - const bool is_local; - std::unique_ptr pool; + const std::string user; + const std::string password; + const std::string db; + const std::string table; Block sample_block; Context context; - const std::string table; + const bool is_local; + std::unique_ptr pool; const std::string load_all_query; }; diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index 8127de614d8..b2d28756b2f 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -27,11 +27,11 @@ public: if (config.has(layout_prefix + "flat")) { - return ext::make_unique(name, dict_struct, config, config_prefix, std::move(source_ptr)); + return ext::make_unique(name, dict_struct, std::move(source_ptr)); } else if (config.has(layout_prefix + "hashed")) { - return ext::make_unique(name, dict_struct, config, config_prefix, std::move(source_ptr)); + return ext::make_unique(name, dict_struct, std::move(source_ptr)); } else if (config.has(layout_prefix + "cache")) { diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 6ae84efdecd..aa02d31d8b8 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -21,7 +21,12 @@ public: last_modification{getLastModification()} {} -private: + FileDictionarySource(const FileDictionarySource & other) + : filename{other.filename}, format{other.format}, + sample_block{other.sample_block}, context(other.context), + last_modification{other.last_modification} + {} + BlockInputStreamPtr loadAll() override { auto in_ptr = ext::make_unique(filename); @@ -50,6 +55,9 @@ private: bool isModified() const override { return getLastModification() > last_modification; } + DictionarySourcePtr clone() const override { return ext::make_unique(*this); } + +private: Poco::Timestamp getLastModification() const { return Poco::File{filename}.getLastModified(); } const std::string filename; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index c75481fcc09..56464a7f2f0 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -18,45 +18,25 @@ class FlatDictionary final : public IDictionary { public: FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, DictionarySourcePtr source_ptr) - : name{name}, source_ptr{std::move(source_ptr)} + DictionarySourcePtr source_ptr) + : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)} { - const auto size = dict_struct.attributes.size(); - attributes.reserve(size); - for (const auto & attribute : dict_struct.attributes) - { - attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), - attribute.null_value))); - - if (attribute.hierarchical) - hierarchical_attribute = &attributes.back(); - } - - auto stream = this->source_ptr->loadAll(); - - while (const auto block = stream->read()) - { - const auto & id_column = *block.getByPosition(0).column; - - for (const auto attribute_idx : ext::range(0, attributes.size())) - { - const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; - auto & attribute = attributes[attribute_idx]; - - for (const auto row_idx : ext::range(0, id_column.size())) - setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); - } - } + createAttributes(); + loadData(); } + FlatDictionary(const FlatDictionary & other) + : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone()} + {} + std::string getName() const override { return name; } std::string getTypeName() const override { return "FlatDictionary"; } bool isCached() const override { return false; } + DictionaryPtr clone() const override { return ext::make_unique(*this); } + const IDictionarySource * const getSource() const override { return source_ptr.get(); } bool hasHierarchy() const override { return hierarchical_attribute; } @@ -165,6 +145,7 @@ public: DECLARE_UNSAFE_GETTER(StringRef, String, string) #undef DECLARE_UNSAFE_GETTER +private: struct attribute_t { attribute_type type; @@ -193,6 +174,40 @@ public: std::unique_ptr> string_array; }; + void createAttributes() + { + const auto size = dict_struct.attributes.size(); + attributes.reserve(size); + for (const auto & attribute : dict_struct.attributes) + { + attribute_index_by_name.emplace(attribute.name, attributes.size()); + attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value))); + + if (attribute.hierarchical) + hierarchical_attribute = &attributes.back(); + } + } + + void loadData() + { + auto stream = source_ptr->loadAll(); + + while (const auto block = stream->read()) + { + const auto & id_column = *block.getByPosition(0).column; + + for (const auto attribute_idx : ext::range(0, attributes.size())) + { + const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; + auto & attribute = attributes[attribute_idx]; + + for (const auto row_idx : ext::range(0, id_column.size())) + setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); + } + } + } + attribute_t createAttributeWithType(const attribute_type type, const std::string & null_value) { attribute_t attr{type}; @@ -353,11 +368,12 @@ public: } const std::string name; + const DictionaryStructure dict_struct; + const DictionarySourcePtr source_ptr; + std::map attribute_index_by_name; std::vector attributes; const attribute_t * hierarchical_attribute = nullptr; - - const DictionarySourcePtr source_ptr; }; } diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index 8145a7d8b83..a0733ae77cd 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -7,7 +7,6 @@ #include #include #include -#include namespace DB { @@ -16,45 +15,25 @@ class HashedDictionary final : public IDictionary { public: HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, DictionarySourcePtr source_ptr) - : name{name}, source_ptr{std::move(source_ptr)} + DictionarySourcePtr source_ptr) + : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)} { - const auto size = dict_struct.attributes.size(); - attributes.reserve(size); - for (const auto & attribute : dict_struct.attributes) - { - attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), - attribute.null_value))); - - if (attribute.hierarchical) - hierarchical_attribute = &attributes.back(); - } - - auto stream = this->source_ptr->loadAll(); - - while (const auto block = stream->read()) - { - const auto & id_column = *block.getByPosition(0).column; - - for (const auto attribute_idx : ext::range(0, attributes.size())) - { - const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; - auto & attribute = attributes[attribute_idx]; - - for (const auto row_idx : ext::range(0, id_column.size())) - setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); - } - } + createAttributes(); + loadData(); } + HashedDictionary(const HashedDictionary & other) + : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone()} + {} + std::string getName() const override { return name; } std::string getTypeName() const override { return "HashedDictionary"; } bool isCached() const override { return false; } + DictionaryPtr clone() const override { return ext::make_unique(*this); } + const IDictionarySource * const getSource() const override { return source_ptr.get(); } bool hasHierarchy() const override { return hierarchical_attribute; } @@ -197,6 +176,7 @@ public: DECLARE_UNSAFE_GETTER(StringRef, String, string) #undef DECLARE_UNSAFE_GETTER +private: struct attribute_t { attribute_type type; @@ -225,6 +205,40 @@ public: std::unique_ptr> string_map; }; + void createAttributes() + { + const auto size = dict_struct.attributes.size(); + attributes.reserve(size); + for (const auto & attribute : dict_struct.attributes) + { + attribute_index_by_name.emplace(attribute.name, attributes.size()); + attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value))); + + if (attribute.hierarchical) + hierarchical_attribute = &attributes.back(); + } + } + + void loadData() + { + auto stream = source_ptr->loadAll(); + + while (const auto block = stream->read()) + { + const auto & id_column = *block.getByPosition(0).column; + + for (const auto attribute_idx : ext::range(0, attributes.size())) + { + const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; + auto & attribute = attributes[attribute_idx]; + + for (const auto row_idx : ext::range(0, id_column.size())) + setAttributeValue(attribute, id_column[row_idx].get(), attribute_column[row_idx]); + } + } + } + attribute_t createAttributeWithType(const attribute_type type, const std::string & null_value) { attribute_t attr{type}; @@ -346,11 +360,13 @@ public: } const std::string name; + const DictionaryStructure dict_struct; + const DictionarySourcePtr source_ptr; + std::map attribute_index_by_name; std::vector attributes; const attribute_t * hierarchical_attribute = nullptr; - const DictionarySourcePtr source_ptr; }; } diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index 9013eeb8993..f9459aa5d9b 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -9,6 +9,9 @@ namespace DB class IDictionarySource; +class IDictionary; +using DictionaryPtr = std::unique_ptr; + class IDictionary { public: @@ -20,6 +23,7 @@ public: virtual bool isCached() const = 0; virtual void reload() {} + virtual DictionaryPtr clone() const = 0; virtual const IDictionarySource * const getSource() const = 0; @@ -83,6 +87,4 @@ public: virtual ~IDictionary() = default; }; -using DictionaryPtr = std::unique_ptr; - } diff --git a/dbms/include/DB/Dictionaries/IDictionarySource.h b/dbms/include/DB/Dictionaries/IDictionarySource.h index ecabc800529..50404cc682c 100644 --- a/dbms/include/DB/Dictionaries/IDictionarySource.h +++ b/dbms/include/DB/Dictionaries/IDictionarySource.h @@ -6,6 +6,9 @@ namespace DB { +class IDictionarySource; +using DictionarySourcePtr = std::unique_ptr; + class IDictionarySource { public: @@ -14,9 +17,9 @@ public: virtual BlockInputStreamPtr loadIds(const std::vector ids) = 0; virtual bool isModified() const = 0; + virtual DictionarySourcePtr clone() const = 0; + virtual ~IDictionarySource() = default; }; -using DictionarySourcePtr = std::unique_ptr; - } diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h index 1e4b00f74e2..32ec45feea9 100644 --- a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -1,12 +1,11 @@ #pragma once -#include -#include #include -#include +#include +#include #include #include -#include +#include namespace DB { @@ -18,15 +17,26 @@ class MysqlDictionarySource final : public IDictionarySource public: MysqlDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, const Context & context) - : layered_config_ptr{getLayeredConfig(config)}, - pool{*layered_config_ptr, config_prefix}, - sample_block{sample_block}, context(context), + : host{config.getString(config_prefix + "host")}, + port(config.getInt(config_prefix + "port")), + user{config.getString(config_prefix + "user", "")}, + password{config.getString(config_prefix + "password", "")}, + db{config.getString(config_prefix + "db", "")}, table{config.getString(config_prefix + "table")}, + sample_block{sample_block}, context(context), + pool{db, host, user, password, port}, load_all_query{composeLoadAllQuery(sample_block, table)}, last_modification{getLastModification()} {} -private: + MysqlDictionarySource(const MysqlDictionarySource & other) + : host{other.host}, port{other.port}, user{other.user}, password{other.password}, + db{other.db}, table{other.db}, + sample_block{other.sample_block}, context(other.context), + pool{db, host, user, password, port}, + load_all_query{other.load_all_query}, last_modification{other.last_modification} + {} + BlockInputStreamPtr loadAll() override { return new MysqlBlockInputStream{pool.Get()->query(load_all_query), sample_block, max_block_size}; @@ -50,6 +60,9 @@ private: bool isModified() const override { return getLastModification() > last_modification; } + DictionarySourcePtr clone() const override { return ext::make_unique(*this); } + +private: mysqlxx::DateTime getLastModification() const { const auto Create_time_idx = 11; @@ -72,13 +85,6 @@ private: return {}; } - static config_ptr_t getLayeredConfig(Poco::Util::AbstractConfiguration & config) - { - config_ptr_t layered_config{new Poco::Util::LayeredConfiguration}; - layered_config->add(&config); - return layered_config; - } - static std::string composeLoadAllQuery(const Block & block, const std::string & table) { std::string query{"SELECT "}; @@ -98,11 +104,15 @@ private: return query; } - const config_ptr_t layered_config_ptr; - mutable mysqlxx::Pool pool; + const std::string host; + const UInt16 port; + const std::string user; + const std::string password; + const std::string db; + const std::string table; Block sample_block; const Context & context; - const std::string table; + mutable mysqlxx::Pool pool; const std::string load_all_query; mysqlxx::DateTime last_modification; }; diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 1ba68a2eaca..c6d35f4148c 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -2,30 +2,9 @@ #include #include - namespace DB { -namespace -{ - std::string findKeyForDictionary(Poco::Util::XMLConfiguration & config, const std::string & name) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(keys); - - for (const auto & key : keys) - { - if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) - continue; - - if (name == config.getString(key + ".name")) - return key; - } - - return {}; - } -} - void Dictionaries::reloadExternals() { const std::lock_guard lock{externals_mutex}; @@ -48,23 +27,31 @@ void Dictionaries::reloadExternals() /// for each dictionary defined in xml config for (const auto & key : keys) { - if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) - { - LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); - continue; - } - - const auto & prefix = key + '.'; - - const auto & name = config->getString(prefix + "name"); - if (name.empty()) - { - LOG_WARNING(log, "dictionary name cannot be empty"); - continue; - } - try { + if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) + { + LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); + continue; + } + + const auto & prefix = key + '.'; + + const auto & name = config->getString(prefix + "name"); + if (name.empty()) + { + LOG_WARNING(log, "dictionary name cannot be empty"); + continue; + } + + const auto & lifetime_key = prefix + "lifetime"; + const auto & lifetime_min_key = lifetime_key + ".min"; + const auto has_min = config->has(lifetime_min_key); + const auto min_update_time = has_min ? config->getInt(lifetime_min_key) : config->getInt(lifetime_key); + const auto max_update_time = has_min ? config->getInt(lifetime_key + ".max") : min_update_time; + + std::cout << "min_update_time = " << min_update_time << " max_update_time = " << max_update_time << std::endl; + auto it = external_dictionaries.find(name); if (it == std::end(external_dictionaries)) { @@ -86,7 +73,7 @@ void Dictionaries::reloadExternals() } } } - catch (const Exception &) + catch (...) { handleException(); } @@ -94,7 +81,6 @@ void Dictionaries::reloadExternals() } else { - config_ptr_t config; for (auto & dictionary : external_dictionaries) { try @@ -110,20 +96,11 @@ void Dictionaries::reloadExternals() if (!current->getSource()->isModified()) continue; - /// source has supposedly been modified, load it over again - if (!config) - config.reset(new Poco::Util::XMLConfiguration{config_path}); - - const auto & name = current->getName(); - const auto & key = findKeyForDictionary(*config, name); - if (!key.empty()) - { - auto dict_ptr = DictionaryFactory::instance().create(name, *config, key + '.', context); - dictionary.second->set(dict_ptr.release()); - } + auto new_version = current->clone(); + dictionary.second->set(new_version.release()); } } - catch (const Exception &) + catch (...) { handleException(); } From 345afb605930f995538a7c6ba3ad6e331e26dc26 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 30 Jan 2015 18:18:13 +0300 Subject: [PATCH 19/43] dbms: store dictionary lifetime in dictionaries [#METR-13298] --- .../DB/Dictionaries/DictionaryFactory.h | 6 ++-- .../DB/Dictionaries/DictionaryStructure.h | 15 +++++++++ dbms/include/DB/Dictionaries/FlatDictionary.h | 10 ++++-- .../DB/Dictionaries/HashedDictionary.h | 10 ++++-- dbms/include/DB/Dictionaries/IDictionary.h | 4 +++ dbms/src/Interpreters/Dictionaries.cpp | 31 +++---------------- 6 files changed, 42 insertions(+), 34 deletions(-) diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index b2d28756b2f..8dcbbe2808d 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -23,15 +23,17 @@ public: auto source_ptr = DictionarySourceFactory::instance().create( config, config_prefix + "source.", dict_struct, context); + const auto dict_lifetime = DictionaryLifetime::fromConfig(config, config_prefix + "lifetime"); + const auto & layout_prefix = config_prefix + "layout."; if (config.has(layout_prefix + "flat")) { - return ext::make_unique(name, dict_struct, std::move(source_ptr)); + return ext::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); } else if (config.has(layout_prefix + "hashed")) { - return ext::make_unique(name, dict_struct, std::move(source_ptr)); + return ext::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); } else if (config.has(layout_prefix + "cache")) { diff --git a/dbms/include/DB/Dictionaries/DictionaryStructure.h b/dbms/include/DB/Dictionaries/DictionaryStructure.h index 27c3e7721bc..cd552784551 100644 --- a/dbms/include/DB/Dictionaries/DictionaryStructure.h +++ b/dbms/include/DB/Dictionaries/DictionaryStructure.h @@ -73,6 +73,21 @@ inline std::string toString(const attribute_type type) }; } +struct DictionaryLifetime +{ + std::uint64_t min_sec; + std::uint64_t max_sec; + + static DictionaryLifetime fromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) + { + const auto & lifetime_min_key = config_prefix + ".min"; + const auto has_min = config.has(lifetime_min_key); + const std::uint64_t min_update_time = has_min ? config.getInt(lifetime_min_key) : config.getInt(config_prefix); + const std::uint64_t max_update_time = has_min ? config.getInt(config_prefix + ".max") : min_update_time; + return { min_update_time, max_update_time }; + } +}; + struct DictionaryAttribute { std::string name; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 56464a7f2f0..c0604ccd701 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -18,15 +18,16 @@ class FlatDictionary final : public IDictionary { public: FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)} + DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime) + : name{name}, dict_struct(dict_struct), + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) { createAttributes(); loadData(); } FlatDictionary(const FlatDictionary & other) - : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone()} + : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} {} std::string getName() const override { return name; } @@ -39,6 +40,8 @@ public: const IDictionarySource * const getSource() const override { return source_ptr.get(); } + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + bool hasHierarchy() const override { return hierarchical_attribute; } id_t toParent(const id_t id) const override @@ -370,6 +373,7 @@ private: const std::string name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index a0733ae77cd..4c83f6395ca 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -15,15 +15,16 @@ class HashedDictionary final : public IDictionary { public: HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)} + DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime) + : name{name}, dict_struct(dict_struct), + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) { createAttributes(); loadData(); } HashedDictionary(const HashedDictionary & other) - : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone()} + : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} {} std::string getName() const override { return name; } @@ -36,6 +37,8 @@ public: const IDictionarySource * const getSource() const override { return source_ptr.get(); } + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + bool hasHierarchy() const override { return hierarchical_attribute; } id_t toParent(const id_t id) const override @@ -362,6 +365,7 @@ private: const std::string name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index f9459aa5d9b..2e99e59ce43 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -12,6 +12,8 @@ class IDictionarySource; class IDictionary; using DictionaryPtr = std::unique_ptr; +class DictionaryLifetime; + class IDictionary { public: @@ -27,6 +29,8 @@ public: virtual const IDictionarySource * const getSource() const = 0; + virtual const DictionaryLifetime & getLifetime() const = 0; + virtual bool hasHierarchy() const = 0; /// do not call unless you ensure that hasHierarchy() returns true diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index c6d35f4148c..4c504ebf657 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -16,6 +16,7 @@ void Dictionaries::reloadExternals() const auto last_modified = Poco::File{config_path}.getLastModified(); if (last_modified > dictionaries_last_modified) { + /// definitions of dictionaries may have changed, recreate all of them dictionaries_last_modified = last_modified; const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; @@ -44,34 +45,13 @@ void Dictionaries::reloadExternals() continue; } - const auto & lifetime_key = prefix + "lifetime"; - const auto & lifetime_min_key = lifetime_key + ".min"; - const auto has_min = config->has(lifetime_min_key); - const auto min_update_time = has_min ? config->getInt(lifetime_min_key) : config->getInt(lifetime_key); - const auto max_update_time = has_min ? config->getInt(lifetime_key + ".max") : min_update_time; - - std::cout << "min_update_time = " << min_update_time << " max_update_time = " << max_update_time << std::endl; - + auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); auto it = external_dictionaries.find(name); + /// add new dictionary or update an existing version if (it == std::end(external_dictionaries)) - { - /// such a dictionary is not present at the moment - auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); - } else - { - /// dictionary exists, it may be desirable to reload it - auto & current = it->second->get(); - if (current->isCached()) - const_cast(current.get())->reload(); - else - { - /// @todo check that timeout has passed - auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); - it->second->set(dict_ptr.release()); - } - } + it->second->set(dict_ptr.release()); } catch (...) { @@ -81,15 +61,14 @@ void Dictionaries::reloadExternals() } else { + /// periodic update for (auto & dictionary : external_dictionaries) { try { auto current = dictionary.second->get(); if (current->isCached()) - { const_cast(current.get())->reload(); - } else { /// @todo check that timeout has passed and load new version From fbdfa3df80cbe6b682fa41862681250fb1746639 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 3 Feb 2015 14:36:07 +0300 Subject: [PATCH 20/43] dbms: proper reloading for non-cached dictionaries [#METR-13298] --- dbms/include/DB/Interpreters/Dictionaries.h | 6 +++- dbms/src/Interpreters/Dictionaries.cpp | 33 ++++++++++++++++----- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index 0ac7544b0e5..08ea189a155 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -33,6 +35,8 @@ private: MultiVersion categories_hierarchy; MultiVersion regions_names; std::unordered_map>> external_dictionaries; + std::unordered_map update_times; + std::mt19937 rnd_engine; const Context & context; /// Периодичность обновления справочников, в секундах. @@ -149,7 +153,7 @@ private: void reloadExternalsPeriodically() { - const auto check_period = 60 * 1000; + const auto check_period = 5 * 1000; while (true) { if (destroy.tryWait(check_period)) diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 4c504ebf657..f9eb89a095c 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -46,6 +46,14 @@ void Dictionaries::reloadExternals() } auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); + if (!dict_ptr->isCached()) + { + const auto & lifetime = dict_ptr->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_times[name] = std::chrono::system_clock::now() + + std::chrono::seconds{distribution(rnd_engine)}; + } + auto it = external_dictionaries.find(name); /// add new dictionary or update an existing version if (it == std::end(external_dictionaries)) @@ -67,16 +75,27 @@ void Dictionaries::reloadExternals() try { auto current = dictionary.second->get(); - if (current->isCached()) - const_cast(current.get())->reload(); - else + /// update only non-cached dictionaries + if (!current->isCached()) { - /// @todo check that timeout has passed and load new version - if (!current->getSource()->isModified()) + auto & update_time = update_times[current->getName()]; + + /// check that timeout has passed + if (std::chrono::system_clock::now() < update_time) continue; - auto new_version = current->clone(); - dictionary.second->set(new_version.release()); + /// check source modified + if (current->getSource()->isModified()) + { + /// create new version of dictionary + auto new_version = current->clone(); + dictionary.second->set(new_version.release()); + } + + /// calculate next update time + const auto & lifetime = current->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; } } catch (...) From 409dc6ff1b20b35a552a27a24e77537b7e7a329c Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 3 Feb 2015 14:36:07 +0300 Subject: [PATCH 21/43] dbms: proper reloading for non-cached dictionaries [#METR-13298] --- dbms/include/DB/Interpreters/Dictionaries.h | 9 +++-- dbms/src/Interpreters/Dictionaries.cpp | 38 ++++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index 0ac7544b0e5..d8b72394251 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -32,7 +34,10 @@ private: MultiVersion tech_data_hierarchy; MultiVersion categories_hierarchy; MultiVersion regions_names; + mutable std::mutex external_dictionaries_mutex; std::unordered_map>> external_dictionaries; + std::unordered_map update_times; + std::mt19937 rnd_engine; const Context & context; /// Периодичность обновления справочников, в секундах. @@ -44,7 +49,6 @@ private: Logger * log; - std::mutex externals_mutex; Poco::Timestamp dictionaries_last_modified{0}; @@ -149,7 +153,7 @@ private: void reloadExternalsPeriodically() { - const auto check_period = 60 * 1000; + const auto check_period = 5 * 1000; while (true) { if (destroy.tryWait(check_period)) @@ -200,6 +204,7 @@ public: MultiVersion::Version getExternalDictionary(const std::string & name) const { + const std::lock_guard lock{external_dictionaries_mutex}; const auto it = external_dictionaries.find(name); if (it == std::end(external_dictionaries)) throw Exception{ diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 4c504ebf657..fd3b2385a46 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -7,8 +7,6 @@ namespace DB void Dictionaries::reloadExternals() { - const std::lock_guard lock{externals_mutex}; - const auto config_path = Poco::Util::Application::instance().config().getString("dictionaries_config"); if (config_path.empty()) return; @@ -46,10 +44,21 @@ void Dictionaries::reloadExternals() } auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); + if (!dict_ptr->isCached()) + { + const auto & lifetime = dict_ptr->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_times[name] = std::chrono::system_clock::now() + + std::chrono::seconds{distribution(rnd_engine)}; + } + auto it = external_dictionaries.find(name); /// add new dictionary or update an existing version if (it == std::end(external_dictionaries)) + { + const std::lock_guard lock{external_dictionaries_mutex}; external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); + } else it->second->set(dict_ptr.release()); } @@ -67,16 +76,27 @@ void Dictionaries::reloadExternals() try { auto current = dictionary.second->get(); - if (current->isCached()) - const_cast(current.get())->reload(); - else + /// update only non-cached dictionaries + if (!current->isCached()) { - /// @todo check that timeout has passed and load new version - if (!current->getSource()->isModified()) + auto & update_time = update_times[current->getName()]; + + /// check that timeout has passed + if (std::chrono::system_clock::now() < update_time) continue; - auto new_version = current->clone(); - dictionary.second->set(new_version.release()); + /// check source modified + if (current->getSource()->isModified()) + { + /// create new version of dictionary + auto new_version = current->clone(); + dictionary.second->set(new_version.release()); + } + + /// calculate next update time + const auto & lifetime = current->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; } } catch (...) From 988016bd447207c675d68053d02f8aba6dbc2ced Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 3 Feb 2015 14:57:52 +0300 Subject: [PATCH 22/43] dbms: revert changes to mysqlxx::Pool[#METR-13298] --- libs/libmysqlxx/include/mysqlxx/Pool.h | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/libs/libmysqlxx/include/mysqlxx/Pool.h b/libs/libmysqlxx/include/mysqlxx/Pool.h index 03957b47f2d..b3821215274 100644 --- a/libs/libmysqlxx/include/mysqlxx/Pool.h +++ b/libs/libmysqlxx/include/mysqlxx/Pool.h @@ -197,30 +197,14 @@ public: * @param max_connections_ Максимальное количество подключений */ Pool(const std::string & config_name, - unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, - unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, - const char * parent_config_name_ = nullptr) - : Pool{ - Poco::Util::Application::instance().config(), config_name, - default_connections_, max_connections_, parent_config_name_ - } - {} - - - /** - * @param cfg Конфигурация - * @param config_name Имя параметра в конфигурационном файле - * @param default_connections_ Количество подключений по-умолчанию - * @param max_connections_ Максимальное количество подключений - */ - Pool(Poco::Util::LayeredConfiguration & cfg, - const std::string & config_name, - unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, - unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, - const char * parent_config_name_ = nullptr) + unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, + unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, + const char * parent_config_name_ = nullptr) : default_connections(default_connections_), max_connections(max_connections_), initialized(false), was_successful(false) { + Poco::Util::LayeredConfiguration & cfg = Poco::Util::Application::instance().config(); + server = cfg.getString(config_name + ".host"); if (parent_config_name_) From 6fecb60894cb8ed308d6e8d1b6d97f8fb9e3e152 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 3 Feb 2015 20:03:35 +0300 Subject: [PATCH 23/43] dbms: better decoupling [#METR-13298] --- .../Dictionaries/ClickhouseDictionarySource.h | 9 ++-- .../DB/Dictionaries/DictionaryFactory.h | 47 ++--------------- .../DB/Dictionaries/DictionarySourceFactory.h | 6 +-- .../DB/Dictionaries/FileDictionarySource.h | 4 +- dbms/include/DB/Dictionaries/IDictionary.h | 1 + dbms/include/DB/Interpreters/Dictionaries.h | 7 ++- dbms/src/Interpreters/Dictionaries.cpp | 2 + dbms/src/Interpreters/DictionaryFactory.cpp | 50 +++++++++++++++++++ 8 files changed, 69 insertions(+), 57 deletions(-) create mode 100644 dbms/src/Interpreters/DictionaryFactory.cpp diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index f409186e994..badd32057a1 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -21,14 +20,14 @@ class ClickhouseDictionarySource final : public IDictionarySource public: ClickhouseDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Block & sample_block, const Context & context) + Block & sample_block, Context & context) : host{config.getString(config_prefix + "host")}, port(config.getInt(config_prefix + "port")), user{config.getString(config_prefix + "user", "")}, password{config.getString(config_prefix + "password", "")}, db{config.getString(config_prefix + "db", "")}, table{config.getString(config_prefix + "table")}, - sample_block{sample_block}, context{context}, + sample_block{sample_block}, context(context), is_local{isLocal(host, port)}, pool{is_local ? nullptr : ext::make_unique( max_connections, host, port, db, user, password, context.getDataTypeFactory(), @@ -40,7 +39,7 @@ public: ClickhouseDictionarySource(const ClickhouseDictionarySource & other) : host{other.host}, port{other.port}, user{other.user}, password{other.password}, db{other.db}, table{other.db}, - sample_block{other.sample_block}, context{other.context}, + sample_block{other.sample_block}, context(other.context), is_local{other.is_local}, pool{is_local ? nullptr : ext::make_unique( max_connections, host, port, db, user, password, context.getDataTypeFactory(), @@ -119,7 +118,7 @@ private: const std::string db; const std::string table; Block sample_block; - Context context; + Context & context; const bool is_local; std::unique_ptr pool; const std::string load_all_query; diff --git a/dbms/include/DB/Dictionaries/DictionaryFactory.h b/dbms/include/DB/Dictionaries/DictionaryFactory.h index 8dcbbe2808d..a46d9a41e8b 100644 --- a/dbms/include/DB/Dictionaries/DictionaryFactory.h +++ b/dbms/include/DB/Dictionaries/DictionaryFactory.h @@ -1,57 +1,18 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include #include namespace DB { +class Context; + class DictionaryFactory : public Singleton { public: DictionaryPtr create(const std::string & name, Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - const Context & context) const - { - auto dict_struct = DictionaryStructure::fromConfig(config, config_prefix + "structure"); - - auto source_ptr = DictionarySourceFactory::instance().create( - config, config_prefix + "source.", dict_struct, context); - - const auto dict_lifetime = DictionaryLifetime::fromConfig(config, config_prefix + "lifetime"); - - const auto & layout_prefix = config_prefix + "layout."; - - if (config.has(layout_prefix + "flat")) - { - return ext::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); - } - else if (config.has(layout_prefix + "hashed")) - { - return ext::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); - } - else if (config.has(layout_prefix + "cache")) - { - const auto size = config.getInt(layout_prefix + "cache.size", 0); - if (size == 0) - throw Exception{ - "Dictionary of type 'cache' cannot have size of 0 bytes", - ErrorCodes::TOO_SMALL_BUFFER_SIZE - }; - - throw Exception{ - "Dictionary of type 'cache' is not yet implemented", - ErrorCodes::NOT_IMPLEMENTED - }; - } - - throw Exception{"No dictionary type specified", ErrorCodes::BAD_ARGUMENTS}; - } + const std::string & config_prefix, Context & context) const; }; } diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h index aa322bcc20e..b6678d11b57 100644 --- a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -44,7 +44,7 @@ public: DictionarySourcePtr create(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct, - const Context & context) const + Context & context) const { auto sample_block = createSampleBlock(dict_struct, context); @@ -60,8 +60,8 @@ public: } else if (config.has(config_prefix + "clickhouse")) { - return ext::make_unique(config, config_prefix + "clickhouse.", - sample_block, context); + return nullptr;//ext::make_unique(config, config_prefix + "clickhouse.", + //sample_block, context); } throw Exception{"unsupported source type"}; diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index aa02d31d8b8..ac90f103727 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -16,7 +16,7 @@ class FileDictionarySource final : public IDictionarySource public: FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, - const Context & context) + Context & context) : filename{filename}, format{format}, sample_block{sample_block}, context(context), last_modification{getLastModification()} {} @@ -63,7 +63,7 @@ private: const std::string filename; const std::string format; Block sample_block; - const Context & context; + Context & context; Poco::Timestamp last_modification; }; diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index 2e99e59ce43..088c9901912 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index d8b72394251..0af34787b0e 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -15,8 +15,6 @@ #include #include -#include - namespace DB { @@ -24,6 +22,7 @@ namespace DB using Poco::SharedPtr; class Context; +class IDictionary; /// Словари Метрики, которые могут использоваться в функциях. @@ -39,7 +38,7 @@ private: std::unordered_map update_times; std::mt19937 rnd_engine; - const Context & context; + Context & context; /// Периодичность обновления справочников, в секундах. int reload_period; @@ -165,7 +164,7 @@ private: public: /// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд. - Dictionaries(const Context & context, int reload_period_ = 3600) + Dictionaries(Context & context, int reload_period_ = 3600) : context(context), reload_period(reload_period_), log(&Logger::get("Dictionaries")) { diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index fd3b2385a46..d93d0bd49c4 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include namespace DB diff --git a/dbms/src/Interpreters/DictionaryFactory.cpp b/dbms/src/Interpreters/DictionaryFactory.cpp new file mode 100644 index 00000000000..2843a31d08d --- /dev/null +++ b/dbms/src/Interpreters/DictionaryFactory.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, Context & context) const +{ + auto dict_struct = DictionaryStructure::fromConfig(config, config_prefix + "structure"); + + auto source_ptr = DictionarySourceFactory::instance().create( + config, config_prefix + "source.", dict_struct, context); + + const auto dict_lifetime = DictionaryLifetime::fromConfig(config, config_prefix + "lifetime"); + + const auto & layout_prefix = config_prefix + "layout."; + + if (config.has(layout_prefix + "flat")) + { + return ext::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + } + else if (config.has(layout_prefix + "hashed")) + { + return ext::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + } + else if (config.has(layout_prefix + "cache")) + { + const auto size = config.getInt(layout_prefix + "cache.size", 0); + if (size == 0) + throw Exception{ + "Dictionary of type 'cache' cannot have size of 0 bytes", + ErrorCodes::TOO_SMALL_BUFFER_SIZE + }; + + throw Exception{ + "Dictionary of type 'cache' is not yet implemented", + ErrorCodes::NOT_IMPLEMENTED + }; + } + + throw Exception{"No dictionary type specified", ErrorCodes::BAD_ARGUMENTS}; +}; + +} From 496fdc02287e38f0bf0fbc83b947b4d8e9a9c009 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 3 Feb 2015 20:14:12 +0300 Subject: [PATCH 24/43] dbms: add safety to Context::getDictionaries [#METR-13298] --- dbms/src/Interpreters/Context.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index cdb6f53b287..5f3ee65a6d5 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -496,7 +496,11 @@ const Dictionaries & Context::getDictionaries() const Poco::ScopedLock lock(shared->mutex); if (!shared->dictionaries) - shared->dictionaries = new Dictionaries{*this->global_context}; + { + if (!this->global_context) + throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); + shared->dictionaries = new Dictionaries{ *this->global_context }; + } return *shared->dictionaries; } From ad54825dfe1e442c6dd1fb044911e0f2d2c3d69b Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 16:06:56 +0300 Subject: [PATCH 25/43] dbms: uncomment clickhouse dictionary source creation [#METR-13298] --- .../DB/Dictionaries/DictionarySourceFactory.h | 4 +-- dbms/src/Interpreters/Dictionaries.cpp | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h index b6678d11b57..e153e3bb21d 100644 --- a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -60,8 +60,8 @@ public: } else if (config.has(config_prefix + "clickhouse")) { - return nullptr;//ext::make_unique(config, config_prefix + "clickhouse.", - //sample_block, context); + ext::make_unique(config, config_prefix + "clickhouse.", + sample_block, context); } throw Exception{"unsupported source type"}; diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index d93d0bd49c4..2fb20c68019 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -7,11 +7,30 @@ namespace DB { +namespace +{ + std::string getDictionariesConfigPath(const Poco::Util::AbstractConfiguration & config) + { + const auto path = config.getString("dictionaries_config"); + if (path.empty()) + return path; + + if (path[0] != '/') + { + const auto app_config_path = config.getString("config-file", "config.xml"); + const auto config_dir = Poco::Path{app_config_path}.parent().toString(); + const auto absolute_path = config_dir + path; + if (Poco::File{absolute_path}.exists()) + return absolute_path; + } + + return path; + } +} + void Dictionaries::reloadExternals() { - const auto config_path = Poco::Util::Application::instance().config().getString("dictionaries_config"); - if (config_path.empty()) - return; + const auto config_path = getDictionariesConfigPath(Poco::Util::Application::instance().config()); const auto last_modified = Poco::File{config_path}.getLastModified(); if (last_modified > dictionaries_last_modified) From f591ba73e16c11a95ad62cd2175778984616ef90 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 16:21:50 +0300 Subject: [PATCH 26/43] dbms: check that dictionaries config file exists [#METR-13298] --- dbms/src/Interpreters/Dictionaries.cpp | 162 +++++++++++++------------ 1 file changed, 84 insertions(+), 78 deletions(-) diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 2fb20c68019..5bf1bd2e2d5 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -31,100 +31,106 @@ namespace void Dictionaries::reloadExternals() { const auto config_path = getDictionariesConfigPath(Poco::Util::Application::instance().config()); + const Poco::File config_file{config_path}; - const auto last_modified = Poco::File{config_path}.getLastModified(); - if (last_modified > dictionaries_last_modified) + if (!config_file.exists()) { - /// definitions of dictionaries may have changed, recreate all of them - dictionaries_last_modified = last_modified; - - const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; - - /// get all dictionaries' definitions - Poco::Util::AbstractConfiguration::Keys keys; - config->keys(keys); - - /// for each dictionary defined in xml config - for (const auto & key : keys) - { - try - { - if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) - { - LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); - continue; - } - - const auto & prefix = key + '.'; - - const auto & name = config->getString(prefix + "name"); - if (name.empty()) - { - LOG_WARNING(log, "dictionary name cannot be empty"); - continue; - } - - auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); - if (!dict_ptr->isCached()) - { - const auto & lifetime = dict_ptr->getLifetime(); - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - update_times[name] = std::chrono::system_clock::now() + - std::chrono::seconds{distribution(rnd_engine)}; - } - - auto it = external_dictionaries.find(name); - /// add new dictionary or update an existing version - if (it == std::end(external_dictionaries)) - { - const std::lock_guard lock{external_dictionaries_mutex}; - external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); - } - else - it->second->set(dict_ptr.release()); - } - catch (...) - { - handleException(); - } - } + LOG_WARNING(log, "config file '" + config_path + "' does not exist"); } else { - /// periodic update - for (auto & dictionary : external_dictionaries) + const auto last_modified = config_file.getLastModified(); + if (last_modified > dictionaries_last_modified) { - try + /// definitions of dictionaries may have changed, recreate all of them + dictionaries_last_modified = last_modified; + + const config_ptr_t config{new Poco::Util::XMLConfiguration{config_path}}; + + /// get all dictionaries' definitions + Poco::Util::AbstractConfiguration::Keys keys; + config->keys(keys); + + /// for each dictionary defined in xml config + for (const auto & key : keys) { - auto current = dictionary.second->get(); - /// update only non-cached dictionaries - if (!current->isCached()) + try { - auto & update_time = update_times[current->getName()]; - - /// check that timeout has passed - if (std::chrono::system_clock::now() < update_time) - continue; - - /// check source modified - if (current->getSource()->isModified()) + if (0 != strncmp(key.data(), "dictionary", strlen("dictionary"))) { - /// create new version of dictionary - auto new_version = current->clone(); - dictionary.second->set(new_version.release()); + LOG_WARNING(log, "unknown node in dictionaries file: '" + key + "', 'dictionary'"); + continue; } - /// calculate next update time - const auto & lifetime = current->getLifetime(); - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + const auto & prefix = key + '.'; + + const auto & name = config->getString(prefix + "name"); + if (name.empty()) + { + LOG_WARNING(log, "dictionary name cannot be empty"); + continue; + } + + auto dict_ptr = DictionaryFactory::instance().create(name, *config, prefix, context); + if (!dict_ptr->isCached()) + { + const auto & lifetime = dict_ptr->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_times[name] = std::chrono::system_clock::now() + + std::chrono::seconds{ distribution(rnd_engine) }; + } + + auto it = external_dictionaries.find(name); + /// add new dictionary or update an existing version + if (it == std::end(external_dictionaries)) + { + const std::lock_guard lock{external_dictionaries_mutex}; + external_dictionaries.emplace(name, std::make_shared>(dict_ptr.release())); + } + else + it->second->set(dict_ptr.release()); + } + catch (...) + { + handleException(); } } - catch (...) + } + } + + /// periodic update + for (auto & dictionary : external_dictionaries) + { + try + { + auto current = dictionary.second->get(); + /// update only non-cached dictionaries + if (!current->isCached()) { - handleException(); + auto & update_time = update_times[current->getName()]; + + /// check that timeout has passed + if (std::chrono::system_clock::now() < update_time) + continue; + + /// check source modified + if (current->getSource()->isModified()) + { + /// create new version of dictionary + auto new_version = current->clone(); + dictionary.second->set(new_version.release()); + } + + /// calculate next update time + const auto & lifetime = current->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; } } + catch (...) + { + handleException(); + } } } From 83f17692e15c2ed29bcfa8da701eef387b4eaa2f Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 16:47:30 +0300 Subject: [PATCH 27/43] dbms: send requests to local clickhouse via network [#METR-13298] --- dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index badd32057a1..5880ce8617a 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -3,11 +3,10 @@ #include #include #include -#include -#include #include #include #include +#include namespace DB { @@ -50,7 +49,9 @@ public: BlockInputStreamPtr loadAll() override { if (is_local) - return executeQuery(load_all_query, context).in; + return new RemoteBlockInputStream{pool.get(), load_all_query, nullptr}; + /// should be processed locally but due to some coupling problems cannot be handled properly now + /// return executeQuery(load_all_query, context).in; return new RemoteBlockInputStream{pool.get(), load_all_query, nullptr}; } From 1ef9c4138ae00c5e5b5976d2a83351eb09c6c68e Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 17:29:02 +0300 Subject: [PATCH 28/43] dbms: allow empty dictionaries_config element in config.xml [#METR-13298] --- dbms/src/Interpreters/Dictionaries.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 5bf1bd2e2d5..589190d286c 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -11,7 +11,7 @@ namespace { std::string getDictionariesConfigPath(const Poco::Util::AbstractConfiguration & config) { - const auto path = config.getString("dictionaries_config"); + const auto path = config.getString("dictionaries_config", ""); if (path.empty()) return path; From 7127c21a4282178629151e9aea0a744e56605443 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 18:18:29 +0300 Subject: [PATCH 29/43] dbms: add forgotten return, support non-reloaded dictionaries [#METR-13298] --- .../DB/Dictionaries/DictionarySourceFactory.h | 2 +- dbms/src/Interpreters/Dictionaries.cpp | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h index e153e3bb21d..d18dd53e218 100644 --- a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -60,7 +60,7 @@ public: } else if (config.has(config_prefix + "clickhouse")) { - ext::make_unique(config, config_prefix + "clickhouse.", + return ext::make_unique(config, config_prefix + "clickhouse.", sample_block, context); } diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index 589190d286c..ee83289af47 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -75,9 +75,15 @@ void Dictionaries::reloadExternals() if (!dict_ptr->isCached()) { const auto & lifetime = dict_ptr->getLifetime(); - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - update_times[name] = std::chrono::system_clock::now() + - std::chrono::seconds{ distribution(rnd_engine) }; + if (lifetime.min_sec != 0 && lifetime.max_sec != 0) + { + std::uniform_int_distribution distribution{ + lifetime.min_sec, + lifetime.max_sec + }; + update_times[name] = std::chrono::system_clock::now() + + std::chrono::seconds{distribution(rnd_engine)}; + } } auto it = external_dictionaries.find(name); @@ -104,6 +110,12 @@ void Dictionaries::reloadExternals() try { auto current = dictionary.second->get(); + const auto & lifetime = current->getLifetime(); + + /// do not update dictionaries with zero as lifetime + if (lifetime.min_sec == 0 || lifetime.max_sec == 0) + continue; + /// update only non-cached dictionaries if (!current->isCached()) { @@ -122,7 +134,6 @@ void Dictionaries::reloadExternals() } /// calculate next update time - const auto & lifetime = current->getLifetime(); std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; } From 0b2e0ce0c1d9ddeba719197ec942e610460e0acb Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 19:07:19 +0300 Subject: [PATCH 30/43] dbms: heal ClickhouseDictionarySource for local clickhouse [#METR-13298] --- dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index 5880ce8617a..a74092ef20b 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -28,7 +28,7 @@ public: table{config.getString(config_prefix + "table")}, sample_block{sample_block}, context(context), is_local{isLocal(host, port)}, - pool{is_local ? nullptr : ext::make_unique( + pool{/*is_local ? nullptr : */ext::make_unique( max_connections, host, port, db, user, password, context.getDataTypeFactory(), "ClickhouseDictionarySource") }, @@ -40,7 +40,7 @@ public: db{other.db}, table{other.db}, sample_block{other.sample_block}, context(other.context), is_local{other.is_local}, - pool{is_local ? nullptr : ext::make_unique( + pool{/*is_local ? nullptr : */ext::make_unique( max_connections, host, port, db, user, password, context.getDataTypeFactory(), "ClickhouseDictionarySource")}, load_all_query{other.load_all_query} From 835c8a317e7855b2eb0f612cf1daa92d2d33eba1 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 4 Feb 2015 19:10:27 +0300 Subject: [PATCH 31/43] dbms: prohibit local clickhouse as a dictionary source [#METR-13298] --- .../DB/Dictionaries/ClickhouseDictionarySource.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index a74092ef20b..48c05e63c36 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -28,19 +28,25 @@ public: table{config.getString(config_prefix + "table")}, sample_block{sample_block}, context(context), is_local{isLocal(host, port)}, - pool{/*is_local ? nullptr : */ext::make_unique( + pool{is_local ? nullptr : ext::make_unique( max_connections, host, port, db, user, password, context.getDataTypeFactory(), "ClickhouseDictionarySource") }, load_all_query{composeLoadAllQuery(sample_block, table)} - {} + { + if (is_local) + throw Exception{ + "Cannot use local clickhouse as a dictionary source", + ErrorCodes::LOGICAL_ERROR + }; + } ClickhouseDictionarySource(const ClickhouseDictionarySource & other) : host{other.host}, port{other.port}, user{other.user}, password{other.password}, db{other.db}, table{other.db}, sample_block{other.sample_block}, context(other.context), is_local{other.is_local}, - pool{/*is_local ? nullptr : */ext::make_unique( + pool{is_local ? nullptr : ext::make_unique( max_connections, host, port, db, user, password, context.getDataTypeFactory(), "ClickhouseDictionarySource")}, load_all_query{other.load_all_query} From 7fb79939002c240b9b6346bebbeed394c20fa92d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 5 Feb 2015 03:33:58 +0300 Subject: [PATCH 32/43] dbms: fixed error with creating headers for dynamic compilation [#METR-2944]. --- copy_headers.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/copy_headers.sh b/copy_headers.sh index 463a2ed535f..abce4224904 100755 --- a/copy_headers.sh +++ b/copy_headers.sh @@ -20,8 +20,9 @@ DST=${2:-$SOURCE_PATH/../headers}; PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:$PATH" +# Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом). -for i in $(clang -M -xc++ -std=gnu++1y -Wall -Werror -msse4 -mpopcnt -O3 -g -fPIC \ +for i in $(clang -M -xc++ -std=gnu++1y -Wall -Werror -msse4 -mcx16 -mpopcnt -O3 -g -fPIC \ $(cat "$SOURCE_PATH/CMakeLists.txt" | grep include_directories | grep -v METRICA_BINARY_DIR | sed -e "s!\${METRICA_SOURCE_DIR}!$SOURCE_PATH!; s!include_directories (!-I !; s!)!!;" | tr '\n' ' ') \ "$SOURCE_PATH/dbms/include/DB/Interpreters/SpecializedAggregator.h" | tr -d '\\' | From d0d7d9e4f7665210e9ff1d38429dfa9a241801ce Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 5 Feb 2015 16:44:33 +0300 Subject: [PATCH 33/43] dbms: return local server support to ClickhouseDictionarySource [#METR-13298] --- .../DB/Dictionaries/ClickhouseDictionarySource.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index 48c05e63c36..eb509191737 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -3,10 +3,10 @@ #include #include #include +#include #include #include #include -#include namespace DB { @@ -33,13 +33,7 @@ public: "ClickhouseDictionarySource") }, load_all_query{composeLoadAllQuery(sample_block, table)} - { - if (is_local) - throw Exception{ - "Cannot use local clickhouse as a dictionary source", - ErrorCodes::LOGICAL_ERROR - }; - } + {} ClickhouseDictionarySource(const ClickhouseDictionarySource & other) : host{other.host}, port{other.port}, user{other.user}, password{other.password}, @@ -55,9 +49,7 @@ public: BlockInputStreamPtr loadAll() override { if (is_local) - return new RemoteBlockInputStream{pool.get(), load_all_query, nullptr}; - /// should be processed locally but due to some coupling problems cannot be handled properly now - /// return executeQuery(load_all_query, context).in; + return executeQuery(load_all_query, context).in; return new RemoteBlockInputStream{pool.get(), load_all_query, nullptr}; } From 5a92644082f8ffe9883f844f3aa1d44643bd294f Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 5 Feb 2015 16:57:12 +0300 Subject: [PATCH 34/43] dbms: disable auto-reset on Poco::Event in Dictionaries [#METR-13298] --- dbms/include/DB/Interpreters/Dictionaries.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Interpreters/Dictionaries.h b/dbms/include/DB/Interpreters/Dictionaries.h index 54862706979..e943d12fe61 100644 --- a/dbms/include/DB/Interpreters/Dictionaries.h +++ b/dbms/include/DB/Interpreters/Dictionaries.h @@ -42,7 +42,7 @@ private: std::thread reloading_thread; std::thread reloading_externals_thread; - Poco::Event destroy; + Poco::Event destroy{false}; Logger * log; From b3c32ace5931d31db255ec8820c0a2d2c40023b6 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 5 Feb 2015 21:47:45 +0300 Subject: [PATCH 35/43] mysqlxx: copy constructor for pools [#METR-13298] --- libs/libmysqlxx/include/mysqlxx/Pool.h | 40 ++++++++++++++----- .../include/mysqlxx/PoolWithFailover.h | 14 ++++++- libs/libmysqlxx/src/PoolWithFailover.cpp | 35 ++++++++++++---- 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/libs/libmysqlxx/include/mysqlxx/Pool.h b/libs/libmysqlxx/include/mysqlxx/Pool.h index b3821215274..d2c13d31013 100644 --- a/libs/libmysqlxx/include/mysqlxx/Pool.h +++ b/libs/libmysqlxx/include/mysqlxx/Pool.h @@ -38,7 +38,7 @@ namespace mysqlxx * * TODO: Упростить, используя PoolBase. */ -class Pool +class Pool final { protected: /** Информация о соединении. */ @@ -191,20 +191,27 @@ public: }; + Pool(const std::string & config_name, + unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, + unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, + const char * parent_config_name_ = nullptr) + : Pool{ + Poco::Util::Application::instance().config(), config_name, + default_connections_, max_connections_, parent_config_name_ + } + {} + /** * @param config_name Имя параметра в конфигурационном файле * @param default_connections_ Количество подключений по-умолчанию * @param max_connections_ Максимальное количество подключений */ - Pool(const std::string & config_name, + Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name, unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, const char * parent_config_name_ = nullptr) - : default_connections(default_connections_), max_connections(max_connections_), - initialized(false), was_successful(false) + : default_connections(default_connections_), max_connections(max_connections_) { - Poco::Util::LayeredConfiguration & cfg = Poco::Util::Application::instance().config(); - server = cfg.getString(config_name + ".host"); if (parent_config_name_) @@ -255,8 +262,19 @@ public: unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS) : default_connections(default_connections_), max_connections(max_connections_), - initialized(false), db(db_), server(server_), user(user_), password(password_), port(port_), - connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), was_successful(false) {} + db(db_), server(server_), user(user_), password(password_), port(port_), + connect_timeout(connect_timeout_), rw_timeout(rw_timeout_) {} + + Pool(const Pool & other) + : default_connections{other.default_connections}, + max_connections{other.max_connections}, + db{other.db}, server{other.server}, + user{other.user}, password{other.password}, + port{other.port}, connect_timeout{other.connect_timeout}, + rw_timeout{other.rw_timeout} + {} + + Pool & operator=(const Pool &) = delete; ~Pool() { @@ -340,7 +358,7 @@ protected: private: /** Признак того, что мы инициализированы. */ - bool initialized; + bool initialized{false}; /** Список соединений. */ typedef std::list Connections; /** Список соединений. */ @@ -360,10 +378,10 @@ private: unsigned rw_timeout; /** Хотя бы один раз было успешное соединение. */ - bool was_successful; + bool was_successful{false}; /** Выполняет инициализацию класса, если мы еще не инициализированы. */ - inline void initialize() + void initialize() { if (!initialized) { diff --git a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h index d74a079885b..3aa3daa11b8 100644 --- a/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h +++ b/libs/libmysqlxx/include/mysqlxx/PoolWithFailover.h @@ -61,7 +61,7 @@ namespace mysqlxx * * */ - class PoolWithFailover + class PoolWithFailover final { private: typedef Poco::SharedPtr PoolPtr; @@ -100,7 +100,17 @@ namespace mysqlxx PoolWithFailover(const std::string & config_name, unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + PoolWithFailover(const Poco::Util::AbstractConfiguration & config, + const std::string & config_name, + unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, + size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + + PoolWithFailover(const PoolWithFailover & other); + + PoolWithFailover & operator=(const PoolWithFailover &) = delete; /** Выделяет соединение для работы. */ Entry Get(); diff --git a/libs/libmysqlxx/src/PoolWithFailover.cpp b/libs/libmysqlxx/src/PoolWithFailover.cpp index 8afeb283b95..a329eed4115 100644 --- a/libs/libmysqlxx/src/PoolWithFailover.cpp +++ b/libs/libmysqlxx/src/PoolWithFailover.cpp @@ -2,13 +2,11 @@ using namespace mysqlxx; -PoolWithFailover::PoolWithFailover(const std::string & config_name, unsigned default_connections, - unsigned max_connections, size_t max_tries_) - : max_tries(max_tries_) +PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & cfg, + const std::string & config_name, const unsigned default_connections, + const unsigned max_connections, const size_t max_tries) + : max_tries(max_tries) { - Poco::Util::Application & app = Poco::Util::Application::instance(); - Poco::Util::AbstractConfiguration & cfg = app.config(); - if (cfg.has(config_name + ".replica")) { Poco::Util::AbstractConfiguration::Keys replica_keys; @@ -20,7 +18,7 @@ PoolWithFailover::PoolWithFailover(const std::string & config_name, unsigned def if (it->size() < std::string("replica").size() || it->substr(0, std::string("replica").size()) != "replica") throw Poco::Exception("Unknown element in config: " + *it + ", expected replica"); std::string replica_name = config_name + "." + *it; - Replica replica(new Pool(replica_name, default_connections, max_connections, config_name.c_str()), + Replica replica(new Pool(cfg, replica_name, default_connections, max_connections, config_name.c_str()), cfg.getInt(replica_name + ".priority", 0)); replicas_by_priority[replica.priority].push_back(replica); } @@ -28,7 +26,28 @@ PoolWithFailover::PoolWithFailover(const std::string & config_name, unsigned def } else { - replicas_by_priority[0].push_back(Replica(new Pool(config_name, default_connections, max_connections), 0)); + replicas_by_priority[0].push_back(Replica(new Pool(cfg, config_name, default_connections, max_connections), 0)); + } +} + +PoolWithFailover::PoolWithFailover(const std::string & config_name, const unsigned default_connections, + const unsigned max_connections, const size_t max_tries) + : PoolWithFailover{ + Poco::Util::Application::instance().config(), config_name, + default_connections, max_connections, max_tries + } +{} + +PoolWithFailover::PoolWithFailover(const PoolWithFailover & other) + : max_tries{other.max_tries} +{ + for (const auto & replica_with_priority : other.replicas_by_priority) + { + Replicas replicas; + replicas.reserve(replica_with_priority.second.size()); + for (const auto & replica : replica_with_priority.second) + replicas.emplace_back(new Pool{*replica.pool}, replica.priority); + replicas_by_priority.emplace(replica_with_priority.first, std::move(replicas)); } } From c594ffa06656dc7ab1db6f943d3fb9afc077a633 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 5 Feb 2015 21:48:29 +0300 Subject: [PATCH 36/43] dbms: failover support for MysqlDictionarySource [#METR-13298]; return current time for tables with Update_time = null --- .../Dictionaries/ClickhouseDictionarySource.h | 3 ++- .../DB/Dictionaries/DictionarySourceFactory.h | 2 +- .../DB/Dictionaries/MysqlDictionarySource.h | 26 +++++-------------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h index eb509191737..10846fdf26e 100644 --- a/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickhouseDictionarySource.h @@ -18,7 +18,8 @@ class ClickhouseDictionarySource final : public IDictionarySource static const auto max_block_size = 8192; public: - ClickhouseDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ClickhouseDictionarySource(const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, Block & sample_block, Context & context) : host{config.getString(config_prefix + "host")}, port(config.getInt(config_prefix + "port")), diff --git a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h index d18dd53e218..e46c8e8fe5c 100644 --- a/dbms/include/DB/Dictionaries/DictionarySourceFactory.h +++ b/dbms/include/DB/Dictionaries/DictionarySourceFactory.h @@ -56,7 +56,7 @@ public: } else if (config.has(config_prefix + "mysql")) { - return ext::make_unique(config, config_prefix + "mysql.", sample_block, context); + return ext::make_unique(config, config_prefix + "mysql", sample_block, context); } else if (config.has(config_prefix + "clickhouse")) { diff --git a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h index 32ec45feea9..e97382cd223 100644 --- a/dbms/include/DB/Dictionaries/MysqlDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MysqlDictionarySource.h @@ -15,25 +15,19 @@ class MysqlDictionarySource final : public IDictionarySource static const auto max_block_size = 8192; public: - MysqlDictionarySource(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + MysqlDictionarySource(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, const Context & context) - : host{config.getString(config_prefix + "host")}, - port(config.getInt(config_prefix + "port")), - user{config.getString(config_prefix + "user", "")}, - password{config.getString(config_prefix + "password", "")}, - db{config.getString(config_prefix + "db", "")}, - table{config.getString(config_prefix + "table")}, + : table{config.getString(config_prefix + ".table")}, sample_block{sample_block}, context(context), - pool{db, host, user, password, port}, + pool{config, config_prefix}, load_all_query{composeLoadAllQuery(sample_block, table)}, last_modification{getLastModification()} {} MysqlDictionarySource(const MysqlDictionarySource & other) - : host{other.host}, port{other.port}, user{other.user}, password{other.password}, - db{other.db}, table{other.db}, + : table{other.table}, sample_block{other.sample_block}, context(other.context), - pool{db, host, user, password, port}, + pool{other.pool}, load_all_query{other.load_all_query}, last_modification{other.last_modification} {} @@ -65,7 +59,6 @@ public: private: mysqlxx::DateTime getLastModification() const { - const auto Create_time_idx = 11; const auto Update_time_idx = 12; try @@ -75,7 +68,7 @@ private: auto result = query.use(); auto row = result.fetch(); const auto & update_time = row[Update_time_idx]; - return !update_time.isNull() ? update_time.getDateTime() : row[Create_time_idx].getDateTime(); + return !update_time.isNull() ? update_time.getDateTime() : mysqlxx::DateTime{std::time(nullptr)}; } catch (...) { @@ -104,15 +97,10 @@ private: return query; } - const std::string host; - const UInt16 port; - const std::string user; - const std::string password; - const std::string db; const std::string table; Block sample_block; const Context & context; - mutable mysqlxx::Pool pool; + mutable mysqlxx::PoolWithFailover pool; const std::string load_all_query; mysqlxx::DateTime last_modification; }; From fd01de7b03634fb03d55f84dfcbbbec66b5760eb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Feb 2015 07:43:42 +0300 Subject: [PATCH 37/43] Separated test case [#METR-10240]. --- dbms/src/IO/tests/io_and_exceptions.cpp | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 dbms/src/IO/tests/io_and_exceptions.cpp diff --git a/dbms/src/IO/tests/io_and_exceptions.cpp b/dbms/src/IO/tests/io_and_exceptions.cpp new file mode 100644 index 00000000000..77127cb278d --- /dev/null +++ b/dbms/src/IO/tests/io_and_exceptions.cpp @@ -0,0 +1,29 @@ +#include +#include +#include + + +int main() +{ + const char * s = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + DB::ReadBuffer file_in(const_cast(s), 32, 0); + DB::CompressedReadBuffer in(file_in); + + try + { + while (!in.eof()) + ; + } + catch (...) + { + std::cerr << "Catched!\n"; + } + + return 0; +} + + +void f() +{ + DB::parse("123"); +} From 96c16b742b9f0c2ebe22cc48c10b4cf2f08429d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Feb 2015 09:49:37 +0300 Subject: [PATCH 38/43] Separated test case for compiler bug [#METR-10240]. --- dbms/src/IO/tests/io_and_exceptions.cpp | 145 ++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 8 deletions(-) diff --git a/dbms/src/IO/tests/io_and_exceptions.cpp b/dbms/src/IO/tests/io_and_exceptions.cpp index 77127cb278d..e832707e5a1 100644 --- a/dbms/src/IO/tests/io_and_exceptions.cpp +++ b/dbms/src/IO/tests/io_and_exceptions.cpp @@ -1,13 +1,143 @@ -#include -#include -#include +/** Воспроизводит баг в gcc 4.8.2 + * Баг: исключение не ловится. + * + * /usr/bin/c++ -std=c++11 -Wall -O3 ./io_and_exceptions.cpp && ./a.out + * + * Выводит: + * terminate called after throwing an instance of 'int' + * Aborted + * + * А должно ничего не выводить. + * + * В gcc 4.9 и clang 3.6 всё Ок. + */ + +typedef unsigned long size_t; + +class BufferBase +{ +public: + typedef char * Position; + + struct Buffer + { + Buffer(Position begin_pos_, Position end_pos_) : begin_pos(begin_pos_), end_pos(end_pos_) {} + + inline Position begin() const { return begin_pos; } + inline Position end() const { return end_pos; } + inline size_t size() const { return end_pos - begin_pos; } + inline void resize(size_t size) { end_pos = begin_pos + size; } + + private: + Position begin_pos; + Position end_pos; + }; + + BufferBase(Position ptr, size_t size, size_t offset) + : internal_buffer(ptr, ptr + size), working_buffer(ptr, ptr + size), pos(ptr + offset), bytes(0) {} + + void set(Position ptr, size_t size, size_t offset) + { + internal_buffer = Buffer(ptr, ptr + size); + working_buffer = Buffer(ptr, ptr + size); + pos = ptr + offset; + } + + inline Buffer & buffer() { return working_buffer; } + inline Position & position() { return pos; }; + inline size_t offset() const { return pos - working_buffer.begin(); } + + size_t count() const + { + return bytes + offset(); + } + +protected: + Buffer internal_buffer; + Buffer working_buffer; + + Position pos; + size_t bytes; +}; + + +class ReadBuffer : public BufferBase +{ +public: + ReadBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) { working_buffer.resize(0); } + ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {} + + inline bool next() + { + bytes += offset(); + bool res = nextImpl(); + if (!res) + working_buffer.resize(0); + + pos = working_buffer.begin(); + return res; + } + + virtual ~ReadBuffer() {} + + inline bool eof() + { + return pos == working_buffer.end() && !next(); + } + +private: + virtual bool nextImpl() { return false; }; +}; + + +class CompressedReadBuffer : public ReadBuffer +{ +private: + bool nextImpl() + { + throw 1; + return true; + } + +public: + CompressedReadBuffer() : ReadBuffer(nullptr, 0) + { + } +}; + + +void readIntText(unsigned & x, ReadBuffer & buf) +{ + x = 0; + while (!buf.eof()) + { + switch (*buf.position()) + { + case '+': + break; + case '9': + x *= 10; + break; + default: + return; + } + } +} + + +unsigned parse(const char * data) +{ + unsigned res; + ReadBuffer buf(const_cast(data), 10, 0); + readIntText(res, buf); + return res; +} + int main() { - const char * s = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; - DB::ReadBuffer file_in(const_cast(s), 32, 0); - DB::CompressedReadBuffer in(file_in); + CompressedReadBuffer in; try { @@ -16,7 +146,6 @@ int main() } catch (...) { - std::cerr << "Catched!\n"; } return 0; @@ -25,5 +154,5 @@ int main() void f() { - DB::parse("123"); + parse("123"); } From 6521efcb2af1c0c5a7ab23f3e5fa6886ebd55088 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Feb 2015 07:25:09 +0300 Subject: [PATCH 39/43] dbms: fixed error [#METR-2944]. --- dbms/include/DB/Core/StringRef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Core/StringRef.h b/dbms/include/DB/Core/StringRef.h index e7ca944e33a..4ae264e4f3f 100644 --- a/dbms/include/DB/Core/StringRef.h +++ b/dbms/include/DB/Core/StringRef.h @@ -48,7 +48,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) if ( compareSSE2(p1, p2) && compareSSE2(p1 + 16, p2 + 16) && compareSSE2(p1 + 32, p2 + 32) - && compareSSE2(p1 + 40, p2 + 40)) + && compareSSE2(p1 + 48, p2 + 48)) { p1 += 64; p2 += 64; From bfbe878f99af918a33df1da70a3852c3360c7c81 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 6 Feb 2015 13:35:35 +0300 Subject: [PATCH 40/43] dbms: properly delay dictionary update time in case of exception [#METR-13298] --- dbms/src/Interpreters/Dictionaries.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index ee83289af47..bf8b38da787 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -125,6 +126,12 @@ void Dictionaries::reloadExternals() if (std::chrono::system_clock::now() < update_time) continue; + scope_exit({ + /// calculate next update time + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + }); + /// check source modified if (current->getSource()->isModified()) { @@ -132,10 +139,6 @@ void Dictionaries::reloadExternals() auto new_version = current->clone(); dictionary.second->set(new_version.release()); } - - /// calculate next update time - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; } } catch (...) From 19e3f7a561fceca209c9aa7e5c926878fb971f6c Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 9 Feb 2015 12:49:21 +0300 Subject: [PATCH 41/43] dbms: remove unused header --- dbms/src/Core/Block.cpp | 1 - dbms/src/Interpreters/InterpreterAlterQuery.cpp | 3 --- 2 files changed, 4 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index d96d8c974e2..336adfa9b78 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 10d50729f75..0b27a35a221 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -10,9 +10,6 @@ #include #include #include -#include -#include -#include #include From 8c62be82d1013a12f2f75d3c1632ad622088b365 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 9 Feb 2015 12:51:08 +0300 Subject: [PATCH 42/43] dbms: make StorageFactory a singleton --- dbms/include/DB/Interpreters/Context.h | 3 --- dbms/include/DB/Storages/StorageFactory.h | 3 ++- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Interpreters/Context.h b/dbms/include/DB/Interpreters/Context.h index c3f08b52fcd..3cdc20dde70 100644 --- a/dbms/include/DB/Interpreters/Context.h +++ b/dbms/include/DB/Interpreters/Context.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -86,7 +85,6 @@ struct ContextShared TableFunctionFactory table_function_factory; /// Табличные функции. AggregateFunctionFactory aggregate_function_factory; /// Агрегатные функции. DataTypeFactory data_type_factory; /// Типы данных. - StorageFactory storage_factory; /// Движки таблиц. FormatFactory format_factory; /// Форматы. mutable SharedPtr dictionaries; /// Словари Метрики. Инициализируются лениво. Users users; /// Известные пользователи. @@ -259,7 +257,6 @@ public: const TableFunctionFactory & getTableFunctionFactory() const { return shared->table_function_factory; } const AggregateFunctionFactory & getAggregateFunctionFactory() const { return shared->aggregate_function_factory; } const DataTypeFactory & getDataTypeFactory() const { return shared->data_type_factory; } - const StorageFactory & getStorageFactory() const { return shared->storage_factory; } const FormatFactory & getFormatFactory() const { return shared->format_factory; } const Dictionaries & getDictionaries() const; diff --git a/dbms/include/DB/Storages/StorageFactory.h b/dbms/include/DB/Storages/StorageFactory.h index cc6aa390379..832fa0ead11 100644 --- a/dbms/include/DB/Storages/StorageFactory.h +++ b/dbms/include/DB/Storages/StorageFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -11,7 +12,7 @@ class Context; /** Позволяет создать таблицу по имени движка. */ -class StorageFactory +class StorageFactory : public Singleton { public: StoragePtr get( diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 95bc1d2993b..a345ffe63bc 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -194,7 +195,7 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) else throw Exception("Incorrect CREATE query: required ENGINE.", ErrorCodes::ENGINE_REQUIRED); - res = context.getStorageFactory().get( + res = StorageFactory::instance().get( storage_name, data_path, table_name, database_name, context, context.getGlobalContext(), query_ptr, columns, materialized_columns, alias_columns, column_defaults, create.attach); From a485aacc54a2ef66aba19d423fa61f97198516e1 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 9 Feb 2015 13:10:25 +0300 Subject: [PATCH 43/43] dbms: fix external dictionaries exception on empty path [#METR-13298] --- dbms/src/Interpreters/Dictionaries.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Dictionaries.cpp b/dbms/src/Interpreters/Dictionaries.cpp index bf8b38da787..a4e713e68eb 100644 --- a/dbms/src/Interpreters/Dictionaries.cpp +++ b/dbms/src/Interpreters/Dictionaries.cpp @@ -34,7 +34,7 @@ void Dictionaries::reloadExternals() const auto config_path = getDictionariesConfigPath(Poco::Util::Application::instance().config()); const Poco::File config_file{config_path}; - if (!config_file.exists()) + if (config_path.empty() || !config_file.exists()) { LOG_WARNING(log, "config file '" + config_path + "' does not exist"); }