From dd2371e07110900e0f1222b5d76d96a8a10c344d Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 28 Nov 2018 14:37:12 +0300 Subject: [PATCH] CLICKHOUSE-4137 DictionaryFactory, DictionarySourceFactory (#3653) * Split ComplexKeyCacheDictionary to faster compile (part2) * Dictionaries as lib WIP * wip * clean * Fix build with old capnp * fix * wip * fixes * fix * clean * clean * clean * wip * wip * wip * flat * wip * cache * clean * wip * faster * fix style * fixes * clean * clean * Split CacheDictionary.cpp for faster compile * fix * fix * Less memory usage while compiling * missing file * format * Update registerDictionaries.h * clean --- cmake/generate_code.cmake | 5 + dbms/CMakeLists.txt | 8 +- dbms/programs/local/LocalServer.cpp | 2 + dbms/programs/server/Server.cpp | 2 + dbms/src/Dictionaries/CMakeLists.txt | 43 ++ dbms/src/Dictionaries/CacheDictionary.cpp | 494 ++---------------- dbms/src/Dictionaries/CacheDictionary.h | 6 +- dbms/src/Dictionaries/CacheDictionary.inc.h | 403 ++++++++++++++ .../CacheDictionary_generate1.cpp.in | 24 + .../CacheDictionary_generate2.cpp.in | 25 + .../CacheDictionary_generate3.cpp.in | 22 + .../ClickHouseDictionarySource.cpp | 24 +- .../Dictionaries/ClickHouseDictionarySource.h | 7 +- .../ComplexKeyCacheDictionary.cpp | 34 +- .../Dictionaries/ComplexKeyCacheDictionary.h | 6 +- ...acheDictionary_createAttributeWithType.cpp | 2 +- .../ComplexKeyCacheDictionary_generate1.cpp | 40 -- ...ComplexKeyCacheDictionary_generate1.cpp.in | 24 + .../ComplexKeyCacheDictionary_generate2.cpp | 41 -- ...ComplexKeyCacheDictionary_generate2.cpp.in | 27 + .../ComplexKeyCacheDictionary_generate3.cpp | 41 -- ...ComplexKeyCacheDictionary_generate3.cpp.in | 27 + ...exKeyCacheDictionary_setAttributeValue.cpp | 2 +- ...cheDictionary_setDefaultAttributeValue.cpp | 2 +- .../ComplexKeyHashedDictionary.cpp | 25 +- .../Dictionaries/ComplexKeyHashedDictionary.h | 7 +- .../Dictionaries/DictionaryBlockInputStream.h | 6 +- .../DictionaryBlockInputStreamBase.cpp | 2 +- dbms/src/Dictionaries/DictionaryFactory.cpp | 51 ++ dbms/src/Dictionaries/DictionaryFactory.h | 31 +- .../Dictionaries/DictionarySourceFactory.cpp | 196 ++----- .../Dictionaries/DictionarySourceFactory.h | 24 +- .../Dictionaries/DictionarySourceHelpers.cpp | 4 +- dbms/src/Dictionaries/DictionaryStructure.cpp | 2 +- dbms/src/Dictionaries/Embedded/CMakeLists.txt | 5 + .../Embedded/GeoDictionariesLoader.cpp | 8 +- .../Embedded/GeoDictionariesLoader.h | 2 +- .../Embedded/GeodataProviders/Entries.h | 2 +- .../GeodataProviders/HierarchiesProvider.cpp | 5 +- .../GeodataProviders/HierarchiesProvider.h | 3 +- .../HierarchyFormatReader.cpp | 2 +- .../GeodataProviders/HierarchyFormatReader.h | 3 +- .../GeodataProviders/IHierarchiesProvider.h | 3 +- .../GeodataProviders/INamesProvider.h | 3 +- .../GeodataProviders/NamesFormatReader.cpp | 2 +- .../GeodataProviders/NamesFormatReader.h | 3 +- .../GeodataProviders/NamesProvider.cpp | 4 +- .../Embedded/GeodataProviders/NamesProvider.h | 3 +- .../Embedded/IGeoDictionariesLoader.h | 17 +- .../Embedded/RegionsHierarchies.cpp | 3 +- .../Embedded/RegionsHierarchies.h | 6 +- .../Embedded/RegionsHierarchy.cpp | 6 +- .../Dictionaries/Embedded/RegionsHierarchy.h | 3 +- .../Dictionaries/Embedded/RegionsNames.cpp | 6 +- dbms/src/Dictionaries/Embedded/RegionsNames.h | 5 +- .../Embedded/TechDataHierarchy.cpp | 2 +- .../Dictionaries/Embedded/TechDataHierarchy.h | 14 +- .../ExecutableDictionarySource.cpp | 22 +- .../Dictionaries/ExecutableDictionarySource.h | 4 +- .../src/Dictionaries/ExternalQueryBuilder.cpp | 6 +- .../ExternalResultDescription.cpp | 2 +- .../src/Dictionaries/FileDictionarySource.cpp | 25 +- dbms/src/Dictionaries/FileDictionarySource.h | 3 +- dbms/src/Dictionaries/FlatDictionary.cpp | 36 +- dbms/src/Dictionaries/FlatDictionary.h | 6 +- .../src/Dictionaries/HTTPDictionarySource.cpp | 22 +- dbms/src/Dictionaries/HTTPDictionarySource.h | 4 +- dbms/src/Dictionaries/HashedDictionary.cpp | 31 +- dbms/src/Dictionaries/HashedDictionary.h | 6 +- dbms/src/Dictionaries/IDictionary.h | 2 +- .../Dictionaries/LibraryDictionarySource.cpp | 20 +- .../Dictionaries/LibraryDictionarySource.h | 7 +- .../LibraryDictionarySourceExternal.cpp | 2 +- .../Dictionaries/MongoDBBlockInputStream.cpp | 4 +- .../Dictionaries/MongoDBBlockInputStream.h | 2 +- .../Dictionaries/MongoDBDictionarySource.cpp | 45 +- .../Dictionaries/MongoDBDictionarySource.h | 13 +- .../Dictionaries/MySQLBlockInputStream.cpp | 2 +- dbms/src/Dictionaries/MySQLBlockInputStream.h | 2 +- .../Dictionaries/MySQLDictionarySource.cpp | 51 +- dbms/src/Dictionaries/MySQLDictionarySource.h | 11 +- .../src/Dictionaries/ODBCBlockInputStream.cpp | 5 +- dbms/src/Dictionaries/ODBCBlockInputStream.h | 4 +- .../RangeDictionaryBlockInputStream.h | 8 +- .../Dictionaries/RangeHashedDictionary.cpp | 31 +- dbms/src/Dictionaries/RangeHashedDictionary.h | 6 +- dbms/src/Dictionaries/TrieDictionary.cpp | 26 +- dbms/src/Dictionaries/TrieDictionary.h | 7 +- .../src/Dictionaries/XDBCDictionarySource.cpp | 54 +- dbms/src/Dictionaries/XDBCDictionarySource.h | 9 +- dbms/src/Dictionaries/readInvalidateQuery.cpp | 2 +- .../src/Dictionaries/registerDictionaries.cpp | 52 ++ dbms/src/Dictionaries/registerDictionaries.h | 6 + .../Dictionaries/writeParenthesisedString.cpp | 2 +- dbms/src/Functions/CMakeLists.txt | 13 +- dbms/src/IO/HTTPCommon.cpp | 2 + .../CatBoostModel.cpp | 3 +- .../CatBoostModel.h | 0 dbms/src/Interpreters/DictionaryFactory.cpp | 133 ----- dbms/src/Interpreters/ExternalModels.h | 2 +- .../Storages/System/StorageSystemModels.cpp | 2 +- 101 files changed, 1407 insertions(+), 1057 deletions(-) create mode 100644 cmake/generate_code.cmake create mode 100644 dbms/src/Dictionaries/CacheDictionary.inc.h create mode 100644 dbms/src/Dictionaries/CacheDictionary_generate1.cpp.in create mode 100644 dbms/src/Dictionaries/CacheDictionary_generate2.cpp.in create mode 100644 dbms/src/Dictionaries/CacheDictionary_generate3.cpp.in delete mode 100644 dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp create mode 100644 dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp.in delete mode 100644 dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp create mode 100644 dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp.in delete mode 100644 dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp create mode 100644 dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp.in create mode 100644 dbms/src/Dictionaries/DictionaryFactory.cpp create mode 100644 dbms/src/Dictionaries/Embedded/CMakeLists.txt create mode 100644 dbms/src/Dictionaries/registerDictionaries.cpp create mode 100644 dbms/src/Dictionaries/registerDictionaries.h rename dbms/src/{Dictionaries => Interpreters}/CatBoostModel.cpp (99%) rename dbms/src/{Dictionaries => Interpreters}/CatBoostModel.h (100%) delete mode 100644 dbms/src/Interpreters/DictionaryFactory.cpp diff --git a/cmake/generate_code.cmake b/cmake/generate_code.cmake new file mode 100644 index 00000000000..8eb9da24d1d --- /dev/null +++ b/cmake/generate_code.cmake @@ -0,0 +1,5 @@ +function(generate_code TEMPLATE_FILE) + foreach(NAME IN LISTS ARGN) + configure_file (${TEMPLATE_FILE}.cpp.in ${CMAKE_CURRENT_BINARY_DIR}/generated/${TEMPLATE_FILE}_${NAME}.cpp) + endforeach() +endfunction() diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index f0116d768e5..cce97e4a57e 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -63,9 +63,6 @@ add_headers_and_sources(dbms src/Core) add_headers_and_sources(dbms src/DataStreams) add_headers_and_sources(dbms src/DataTypes) add_headers_and_sources(dbms src/Databases) -add_headers_and_sources(dbms src/Dictionaries) -add_headers_and_sources(dbms src/Dictionaries/Embedded) -add_headers_and_sources(dbms src/Dictionaries/Embedded/GeodataProviders) add_headers_and_sources(dbms src/Interpreters) add_headers_and_sources(dbms src/Interpreters/ClusterProxy) add_headers_and_sources(dbms src/Columns) @@ -184,8 +181,11 @@ target_link_libraries (dbms clickhouse_common_config PUBLIC clickhouse_common_io - pocoext + PRIVATE + clickhouse_dictionaries + clickhouse_dictionaries_embedded PUBLIC + pocoext ${MYSQLXX_LIBRARY} PRIVATE ${BTRIE_LIBRARIES} diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 0dab224c7f1..40e26438afc 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,7 @@ try registerAggregateFunctions(); registerTableFunctions(); registerStorages(); + registerDictionaries(); /// Maybe useless if (config().has("macros")) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 8d3bff9ebfb..32f88079ce3 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "HTTPHandlerFactory.h" #include "MetricsTransmitter.h" @@ -109,6 +110,7 @@ int Server::main(const std::vector & /*args*/) registerAggregateFunctions(); registerTableFunctions(); registerStorages(); + registerDictionaries(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index e69de29bb2d..08624bd6c6a 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -0,0 +1,43 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) +include(${ClickHouse_SOURCE_DIR}/cmake/generate_code.cmake) + +add_headers_and_sources(clickhouse_dictionaries .) + +generate_code(ComplexKeyCacheDictionary_generate1 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) +generate_code(ComplexKeyCacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) +generate_code(ComplexKeyCacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) +generate_code(CacheDictionary_generate1 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) +generate_code(CacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) +generate_code(CacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) +add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/generated/) + +add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources}) +target_link_libraries(clickhouse_dictionaries PRIVATE clickhouse_common_io pocoext ${MYSQLXX_LIBRARY} ${BTRIE_LIBRARIES}) + +if(Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR}) +endif() + +if(USE_POCO_SQLODBC) + target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY}) + if (NOT USE_INTERNAL_POCO_LIBRARY) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR} ${Poco_SQL_INCLUDE_DIR}) + endif() +endif() + +if(Poco_Data_FOUND) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) +endif() + +if(USE_POCO_DATAODBC) + target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_DataODBC_LIBRARY} ${Poco_Data_LIBRARY}) + if (NOT USE_INTERNAL_POCO_LIBRARY) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR}) + endif() +endif() + +if(USE_POCO_MONGODB) + target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_MongoDB_LIBRARY}) +endif() + +add_subdirectory(Embedded) diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 41950b443f1..08a0752a23f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -1,3 +1,5 @@ +#include "CacheDictionary.h" + #include #include #include @@ -11,12 +13,12 @@ #include #include #include -#include -#include +#include "DictionaryBlockInputStream.h" #include #include #include - +#include "DictionaryFactory.h" +#include "CacheDictionary.inc.h" namespace ProfileEvents { @@ -47,6 +49,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int UNSUPPORTED_METHOD; extern const int LOGICAL_ERROR; + extern const int TOO_SMALL_BUFFER_SIZE; } @@ -206,34 +209,6 @@ void CacheDictionary::isInConstantVector( out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end(); } - -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - const auto null_value = std::get(attribute.null_values);\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t) { return null_value; });\ -} -DECLARE(UInt8) -DECLARE(UInt16) -DECLARE(UInt32) -DECLARE(UInt64) -DECLARE(UInt128) -DECLARE(Int8) -DECLARE(Int16) -DECLARE(Int32) -DECLARE(Int64) -DECLARE(Float32) -DECLARE(Float64) -DECLARE(Decimal32) -DECLARE(Decimal64) -DECLARE(Decimal128) -#undef DECLARE - void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const { auto & attribute = getAttribute(attribute_name); @@ -245,33 +220,6 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded getItemsString(attribute, ids, out, [&] (const size_t) { return null_value; }); } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const PaddedPODArray & def,\ - ResultArrayType & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t row) { return def[row]; });\ -} -DECLARE(UInt8) -DECLARE(UInt16) -DECLARE(UInt32) -DECLARE(UInt64) -DECLARE(UInt128) -DECLARE(Int8) -DECLARE(Int16) -DECLARE(Int32) -DECLARE(Int64) -DECLARE(Float32) -DECLARE(Float64) -DECLARE(Decimal32) -DECLARE(Decimal64) -DECLARE(Decimal128) -#undef DECLARE - void CacheDictionary::getString( const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const @@ -283,32 +231,6 @@ void CacheDictionary::getString( getItemsString(attribute, ids, out, [&] (const size_t row) { return def->getDataAt(row); }); } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t) { return def; });\ -} -DECLARE(UInt8) -DECLARE(UInt16) -DECLARE(UInt32) -DECLARE(UInt64) -DECLARE(UInt128) -DECLARE(Int8) -DECLARE(Int16) -DECLARE(Int32) -DECLARE(Int64) -DECLARE(Float32) -DECLARE(Float64) -DECLARE(Decimal32) -DECLARE(Decimal64) -DECLARE(Decimal128) -#undef DECLARE - void CacheDictionary::getString( const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const @@ -487,374 +409,6 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib return attr; } - -template -void CacheDictionary::getItemsNumber( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const -{ - if (false) {} -#define DISPATCH(TYPE) \ - else if (attribute.type == AttributeUnderlyingType::TYPE) \ - getItemsNumberImpl(attribute, ids, out, std::forward(get_default)); - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Float32) - DISPATCH(Float64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) -#undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); -} - -template -void CacheDictionary::getItemsNumberImpl( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const -{ - /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } - std::unordered_map> outdated_ids; - auto & attribute_array = std::get>(attribute.arrays); - const auto rows = ext::size(ids); - - size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; - - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - - const auto now = std::chrono::system_clock::now(); - /// fetch up-to-date values, decide which ones require update - for (const auto row : ext::range(0, rows)) - { - const auto id = ids[row]; - - /** cell should be updated if either: - * 1. ids do not match, - * 2. cell has expired, - * 3. explicit defaults were specified and cell was set default. */ - - const auto find_result = findCellIdx(id, now); - if (!find_result.valid) - { - outdated_ids[id].push_back(row); - if (find_result.outdated) - ++cache_expired; - else - ++cache_not_found; - } - else - { - ++cache_hit; - const auto & cell_idx = find_result.cell_idx; - const auto & cell = cells[cell_idx]; - out[row] = cell.isDefault() ? get_default(row) : static_cast(attribute_array[cell_idx]); - } - } - } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - - query_count.fetch_add(rows, std::memory_order_relaxed); - hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); - - if (outdated_ids.empty()) - return; - - std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); - - /// request new values - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; - - for (const size_t row : outdated_ids[id]) - out[row] = static_cast(attribute_value); - }, - [&] (const auto id, const auto) - { - for (const size_t row : outdated_ids[id]) - out[row] = get_default(row); - }); -} - -template -void CacheDictionary::getItemsString( - Attribute & attribute, - const PaddedPODArray & ids, - ColumnString * out, - DefaultGetter && get_default) const -{ - const auto rows = ext::size(ids); - - /// save on some allocations - out->getOffsets().reserve(rows); - - auto & attribute_array = std::get>(attribute.arrays); - - auto found_outdated_values = false; - - /// perform optimistic version, fallback to pessimistic if failed - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - - const auto now = std::chrono::system_clock::now(); - /// fetch up-to-date values, discard on fail - for (const auto row : ext::range(0, rows)) - { - const auto id = ids[row]; - - const auto find_result = findCellIdx(id, now); - if (!find_result.valid) - { - found_outdated_values = true; - break; - } - else - { - const auto & cell_idx = find_result.cell_idx; - const auto & cell = cells[cell_idx]; - const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; - out->insertData(string_ref.data, string_ref.size); - } - } - } - - /// optimistic code completed successfully - if (!found_outdated_values) - { - query_count.fetch_add(rows, std::memory_order_relaxed); - hit_count.fetch_add(rows, std::memory_order_release); - return; - } - - /// now onto the pessimistic one, discard possible partial results from the optimistic path - out->getChars().resize_assume_reserved(0); - out->getOffsets().resize_assume_reserved(0); - - /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } - std::unordered_map> outdated_ids; - /// we are going to store every string separately - std::unordered_map map; - - size_t total_length = 0; - size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - - const auto now = std::chrono::system_clock::now(); - for (const auto row : ext::range(0, ids.size())) - { - const auto id = ids[row]; - - const auto find_result = findCellIdx(id, now); - if (!find_result.valid) - { - outdated_ids[id].push_back(row); - if (find_result.outdated) - ++cache_expired; - else - ++cache_not_found; - } - else - { - ++cache_hit; - const auto & cell_idx = find_result.cell_idx; - const auto & cell = cells[cell_idx]; - const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; - - if (!cell.isDefault()) - map[id] = String{string_ref}; - - total_length += string_ref.size + 1; - } - } - } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - - query_count.fetch_add(rows, std::memory_order_relaxed); - hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); - - /// request new values - if (!outdated_ids.empty()) - { - std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); - - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; - - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * outdated_ids[id].size(); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - total_length += get_default(row).size + 1; - }); - } - - out->getChars().reserve(total_length); - - for (const auto row : ext::range(0, ext::size(ids))) - { - const auto id = ids[row]; - const auto it = map.find(id); - - const auto string_ref = it != std::end(map) ? StringRef{it->second} : get_default(row); - out->insertData(string_ref.data, string_ref.size); - } -} - -template -void CacheDictionary::update( - const std::vector & requested_ids, - PresentIdHandler && on_cell_updated, - AbsentIdHandler && on_id_not_found) const -{ - std::unordered_map remaining_ids{requested_ids.size()}; - for (const auto id : requested_ids) - remaining_ids.insert({ id, 0 }); - - std::uniform_int_distribution distribution - { - dict_lifetime.min_sec, - dict_lifetime.max_sec - }; - - const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; - - { - CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; - Stopwatch watch; - auto stream = source_ptr->loadIds(requested_ids); - stream->readPrefix(); - - const auto now = std::chrono::system_clock::now(); - - while (const auto block = stream->read()) - { - const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); - if (!id_column) - throw Exception{name + ": id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; - - const auto & ids = id_column->getData(); - - /// cache column pointers - const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block] (size_t i) - { - return block.safeGetByPosition(i + 1).column.get(); - }); - - for (const auto i : ext::range(0, ids.size())) - { - const auto id = ids[i]; - - const auto find_result = findCellIdx(id, now); - const auto & cell_idx = find_result.cell_idx; - - auto & cell = cells[cell_idx]; - - for (const auto attribute_idx : ext::range(0, attributes.size())) - { - const auto & attribute_column = *column_ptrs[attribute_idx]; - auto & attribute = attributes[attribute_idx]; - - setAttributeValue(attribute, cell_idx, attribute_column[i]); - } - - /// if cell id is zero and zero does not map to this cell, then the cell is unused - if (cell.id == 0 && cell_idx != zero_cell_idx) - element_count.fetch_add(1, std::memory_order_relaxed); - - cell.id = id; - if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) - cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}); - else - cell.setExpiresAt(std::chrono::time_point::max()); - - /// inform caller - on_cell_updated(id, cell_idx); - /// mark corresponding id as found - remaining_ids[id] = 1; - } - } - - stream->readSuffix(); - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); - } - - size_t not_found_num = 0, found_num = 0; - - const auto now = std::chrono::system_clock::now(); - /// Check which ids have not been found and require setting null_value - for (const auto & id_found_pair : remaining_ids) - { - if (id_found_pair.second) - { - ++found_num; - continue; - } - ++not_found_num; - - const auto id = id_found_pair.first; - - const auto find_result = findCellIdx(id, now); - const auto & cell_idx = find_result.cell_idx; - - auto & cell = cells[cell_idx]; - - /// Set null_value for each attribute - for (auto & attribute : attributes) - setDefaultAttributeValue(attribute, cell_idx); - - /// Check if cell had not been occupied before and increment element counter if it hadn't - if (cell.id == 0 && cell_idx != zero_cell_idx) - element_count.fetch_add(1, std::memory_order_relaxed); - - cell.id = id; - if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) - cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}); - else - cell.setExpiresAt(std::chrono::time_point::max()); - - cell.setDefault(); - - /// inform caller that the cell has not been found - on_id_not_found(id, cell_idx); - } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); - ProfileEvents::increment(ProfileEvents::DictCacheRequests); -} - - void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const { switch (attribute.type) @@ -981,5 +535,41 @@ BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_na return std::make_shared(shared_from_this(), max_block_size, getCachedIds(), column_names); } +void registerDictionaryCache(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + + if (dict_struct.key) + throw Exception {"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception {name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const auto & layout_prefix = config_prefix + ".layout"; + const auto size = config.getInt(layout_prefix + ".cache.size_in_cells"); + if (size == 0) + throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + if (require_nonempty) + throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); + + + }; + factory.registerLayout("cache", create_layout); +} + } diff --git a/dbms/src/Dictionaries/CacheDictionary.h b/dbms/src/Dictionaries/CacheDictionary.h index 453e38246ec..8b72daaca23 100644 --- a/dbms/src/Dictionaries/CacheDictionary.h +++ b/dbms/src/Dictionaries/CacheDictionary.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "IDictionary.h" +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include #include diff --git a/dbms/src/Dictionaries/CacheDictionary.inc.h b/dbms/src/Dictionaries/CacheDictionary.inc.h new file mode 100644 index 00000000000..6fc082ab267 --- /dev/null +++ b/dbms/src/Dictionaries/CacheDictionary.inc.h @@ -0,0 +1,403 @@ +#include "CacheDictionary.h" + +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event DictCacheKeysRequested; + extern const Event DictCacheKeysRequestedMiss; + extern const Event DictCacheKeysRequestedFound; + extern const Event DictCacheKeysExpired; + extern const Event DictCacheKeysNotFound; + extern const Event DictCacheKeysHit; + extern const Event DictCacheRequestTimeNs; + extern const Event DictCacheRequests; + extern const Event DictCacheLockWriteNs; + extern const Event DictCacheLockReadNs; +} + +namespace CurrentMetrics +{ + extern const Metric DictCacheRequests; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +template +void CacheDictionary::getItemsNumber( + Attribute & attribute, + const PaddedPODArray & ids, + ResultArrayType & out, + DefaultGetter && get_default) const +{ + if (false) {} +#define DISPATCH(TYPE) \ + else if (attribute.type == AttributeUnderlyingType::TYPE) \ + getItemsNumberImpl(attribute, ids, out, std::forward(get_default)); + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Float32) + DISPATCH(Float64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) +#undef DISPATCH + else + throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); +} + +template +void CacheDictionary::getItemsNumberImpl( + Attribute & attribute, + const PaddedPODArray & ids, + ResultArrayType & out, + DefaultGetter && get_default) const +{ + /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } + std::unordered_map> outdated_ids; + auto & attribute_array = std::get>(attribute.arrays); + const auto rows = ext::size(ids); + + size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; + + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + const auto now = std::chrono::system_clock::now(); + /// fetch up-to-date values, decide which ones require update + for (const auto row : ext::range(0, rows)) + { + const auto id = ids[row]; + + /** cell should be updated if either: + * 1. ids do not match, + * 2. cell has expired, + * 3. explicit defaults were specified and cell was set default. */ + + const auto find_result = findCellIdx(id, now); + if (!find_result.valid) + { + outdated_ids[id].push_back(row); + if (find_result.outdated) + ++cache_expired; + else + ++cache_not_found; + } + else + { + ++cache_hit; + const auto & cell_idx = find_result.cell_idx; + const auto & cell = cells[cell_idx]; + out[row] = cell.isDefault() ? get_default(row) : static_cast(attribute_array[cell_idx]); + } + } + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); + ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); + ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); + + query_count.fetch_add(rows, std::memory_order_relaxed); + hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); + + if (outdated_ids.empty()) + return; + + std::vector required_ids(outdated_ids.size()); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), + [] (auto & pair) { return pair.first; }); + + /// request new values + update(required_ids, + [&] (const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; + + for (const size_t row : outdated_ids[id]) + out[row] = static_cast(attribute_value); + }, + [&] (const auto id, const auto) + { + for (const size_t row : outdated_ids[id]) + out[row] = get_default(row); + }); +} + +template +void CacheDictionary::getItemsString( + Attribute & attribute, + const PaddedPODArray & ids, + ColumnString * out, + DefaultGetter && get_default) const +{ + const auto rows = ext::size(ids); + + /// save on some allocations + out->getOffsets().reserve(rows); + + auto & attribute_array = std::get>(attribute.arrays); + + auto found_outdated_values = false; + + /// perform optimistic version, fallback to pessimistic if failed + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + const auto now = std::chrono::system_clock::now(); + /// fetch up-to-date values, discard on fail + for (const auto row : ext::range(0, rows)) + { + const auto id = ids[row]; + + const auto find_result = findCellIdx(id, now); + if (!find_result.valid) + { + found_outdated_values = true; + break; + } + else + { + const auto & cell_idx = find_result.cell_idx; + const auto & cell = cells[cell_idx]; + const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; + out->insertData(string_ref.data, string_ref.size); + } + } + } + + /// optimistic code completed successfully + if (!found_outdated_values) + { + query_count.fetch_add(rows, std::memory_order_relaxed); + hit_count.fetch_add(rows, std::memory_order_release); + return; + } + + /// now onto the pessimistic one, discard possible partial results from the optimistic path + out->getChars().resize_assume_reserved(0); + out->getOffsets().resize_assume_reserved(0); + + /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } + std::unordered_map> outdated_ids; + /// we are going to store every string separately + std::unordered_map map; + + size_t total_length = 0; + size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + const auto now = std::chrono::system_clock::now(); + for (const auto row : ext::range(0, ids.size())) + { + const auto id = ids[row]; + + const auto find_result = findCellIdx(id, now); + if (!find_result.valid) + { + outdated_ids[id].push_back(row); + if (find_result.outdated) + ++cache_expired; + else + ++cache_not_found; + } + else + { + ++cache_hit; + const auto & cell_idx = find_result.cell_idx; + const auto & cell = cells[cell_idx]; + const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; + + if (!cell.isDefault()) + map[id] = String{string_ref}; + + total_length += string_ref.size + 1; + } + } + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); + ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); + ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); + + query_count.fetch_add(rows, std::memory_order_relaxed); + hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); + + /// request new values + if (!outdated_ids.empty()) + { + std::vector required_ids(outdated_ids.size()); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), + [] (auto & pair) { return pair.first; }); + + update(required_ids, + [&] (const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; + + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * outdated_ids[id].size(); + }, + [&] (const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + total_length += get_default(row).size + 1; + }); + } + + out->getChars().reserve(total_length); + + for (const auto row : ext::range(0, ext::size(ids))) + { + const auto id = ids[row]; + const auto it = map.find(id); + + const auto string_ref = it != std::end(map) ? StringRef{it->second} : get_default(row); + out->insertData(string_ref.data, string_ref.size); + } +} + +template +void CacheDictionary::update( + const std::vector & requested_ids, + PresentIdHandler && on_cell_updated, + AbsentIdHandler && on_id_not_found) const +{ + std::unordered_map remaining_ids{requested_ids.size()}; + for (const auto id : requested_ids) + remaining_ids.insert({ id, 0 }); + + std::uniform_int_distribution distribution + { + dict_lifetime.min_sec, + dict_lifetime.max_sec + }; + + const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + { + CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; + Stopwatch watch; + auto stream = source_ptr->loadIds(requested_ids); + stream->readPrefix(); + + const auto now = std::chrono::system_clock::now(); + + while (const auto block = stream->read()) + { + const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); + if (!id_column) + throw Exception{name + ": id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; + + const auto & ids = id_column->getData(); + + /// cache column pointers + const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block] (size_t i) + { + return block.safeGetByPosition(i + 1).column.get(); + }); + + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + + const auto find_result = findCellIdx(id, now); + const auto & cell_idx = find_result.cell_idx; + + auto & cell = cells[cell_idx]; + + for (const auto attribute_idx : ext::range(0, attributes.size())) + { + const auto & attribute_column = *column_ptrs[attribute_idx]; + auto & attribute = attributes[attribute_idx]; + + setAttributeValue(attribute, cell_idx, attribute_column[i]); + } + + /// if cell id is zero and zero does not map to this cell, then the cell is unused + if (cell.id == 0 && cell_idx != zero_cell_idx) + element_count.fetch_add(1, std::memory_order_relaxed); + + cell.id = id; + if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) + cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}); + else + cell.setExpiresAt(std::chrono::time_point::max()); + + /// inform caller + on_cell_updated(id, cell_idx); + /// mark corresponding id as found + remaining_ids[id] = 1; + } + } + + stream->readSuffix(); + + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); + ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); + } + + size_t not_found_num = 0, found_num = 0; + + const auto now = std::chrono::system_clock::now(); + /// Check which ids have not been found and require setting null_value + for (const auto & id_found_pair : remaining_ids) + { + if (id_found_pair.second) + { + ++found_num; + continue; + } + ++not_found_num; + + const auto id = id_found_pair.first; + + const auto find_result = findCellIdx(id, now); + const auto & cell_idx = find_result.cell_idx; + + auto & cell = cells[cell_idx]; + + /// Set null_value for each attribute + for (auto & attribute : attributes) + setDefaultAttributeValue(attribute, cell_idx); + + /// Check if cell had not been occupied before and increment element counter if it hadn't + if (cell.id == 0 && cell_idx != zero_cell_idx) + element_count.fetch_add(1, std::memory_order_relaxed); + + cell.id = id; + if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) + cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}); + else + cell.setExpiresAt(std::chrono::time_point::max()); + + cell.setDefault(); + + /// inform caller that the cell has not been found + on_id_not_found(id, cell_idx); + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); + ProfileEvents::increment(ProfileEvents::DictCacheRequests); +} + +} diff --git a/dbms/src/Dictionaries/CacheDictionary_generate1.cpp.in b/dbms/src/Dictionaries/CacheDictionary_generate1.cpp.in new file mode 100644 index 00000000000..53addbed3f6 --- /dev/null +++ b/dbms/src/Dictionaries/CacheDictionary_generate1.cpp.in @@ -0,0 +1,24 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +using TYPE = @NAME@; +void CacheDictionary::get@NAME@(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const +{ + auto & attribute = getAttribute(attribute_name); + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::@NAME@)) + throw Exception {name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; + + const auto null_value = std::get(attribute.null_values); + + getItemsNumber(attribute, ids, out, [&](const size_t) { return null_value; }); +} + +} diff --git a/dbms/src/Dictionaries/CacheDictionary_generate2.cpp.in b/dbms/src/Dictionaries/CacheDictionary_generate2.cpp.in new file mode 100644 index 00000000000..5d87310030f --- /dev/null +++ b/dbms/src/Dictionaries/CacheDictionary_generate2.cpp.in @@ -0,0 +1,25 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +using TYPE = @NAME@; +void CacheDictionary::get@NAME@(const std::string & attribute_name, + const PaddedPODArray & ids, + const PaddedPODArray & def, + ResultArrayType & out) const +{ + auto & attribute = getAttribute(attribute_name); + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::@NAME@)) + throw Exception {name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; + + getItemsNumber(attribute, ids, out, [&](const size_t row) { return def[row]; }); +} + +} diff --git a/dbms/src/Dictionaries/CacheDictionary_generate3.cpp.in b/dbms/src/Dictionaries/CacheDictionary_generate3.cpp.in new file mode 100644 index 00000000000..7931630d2e9 --- /dev/null +++ b/dbms/src/Dictionaries/CacheDictionary_generate3.cpp.in @@ -0,0 +1,22 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +using TYPE = @NAME@; +void CacheDictionary::get@NAME@(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const +{ + auto & attribute = getAttribute(attribute_name); + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::@NAME@)) + throw Exception {name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; + + getItemsNumber(attribute, ids, out, [&](const size_t) { return def; }); +} + +} diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 4a52f4a9f4f..161a157ffaa 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -1,14 +1,17 @@ -#include -#include -#include +#include "ClickHouseDictionarySource.h" +#include "ExternalQueryBuilder.h" +#include "writeParenthesisedString.h" #include #include -#include +#include "readInvalidateQuery.h" #include #include #include #include #include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + namespace DB { @@ -175,4 +178,17 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re } } + +void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + Context & context) -> DictionarySourcePtr { + return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context); + }; + factory.registerSource("clickhouse", createTableSource); +} + } diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.h b/dbms/src/Dictionaries/ClickHouseDictionarySource.h index d7559bda99b..89db23737bc 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.h @@ -1,10 +1,9 @@ #pragma once -#include -#include -#include +#include "IDictionarySource.h" +#include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" #include -#include #include diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 86fbfbb474e..61693a3538a 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -1,5 +1,5 @@ -#include -#include +#include "ComplexKeyCacheDictionary.h" +#include "DictionaryBlockInputStream.h" #include #include #include @@ -9,6 +9,7 @@ #include #include #include +#include "DictionaryFactory.h" namespace ProfileEvents @@ -39,6 +40,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int BAD_ARGUMENTS; extern const int UNSUPPORTED_METHOD; + extern const int TOO_SMALL_BUFFER_SIZE; } @@ -378,4 +380,32 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & return std::make_shared(shared_from_this(), max_block_size, keys, column_names); } +void registerDictionaryComplexKeyCache(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + if (!dict_struct.key) + throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; + const auto & layout_prefix = config_prefix + ".layout"; + const auto size = config.getInt(layout_prefix + ".complex_key_cache.size_in_cells"); + if (size == 0) + throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + if (require_nonempty) + throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); + }; + factory.registerLayout("complex_key_cache", create_layout); +} + + } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index a4a6ae4c16a..f60e142db5e 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -12,9 +12,9 @@ #include #include #include -#include -#include -#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" #include #include #include diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp index 3bf10833a80..843c389dcb0 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -1,4 +1,4 @@ -#include +#include "ComplexKeyCacheDictionary.h" namespace DB { diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp deleted file mode 100644 index c22c14bc267..00000000000 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "ComplexKeyCacheDictionary.h" - -namespace DB -{ -namespace ErrorCodes -{ - extern const int TYPE_MISMATCH; -} - -#define DECLARE(TYPE) \ - void ComplexKeyCacheDictionary::get##TYPE( \ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const \ - { \ - dict_struct.validateKeyTypes(key_types); \ - \ - auto & attribute = getAttribute(attribute_name); \ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ - ErrorCodes::TYPE_MISMATCH}; \ - \ - const auto null_value = std::get(attribute.null_values); \ - \ - getItemsNumber(attribute, key_columns, out, [&](const size_t) { return null_value; }); \ - } -DECLARE(UInt8) -DECLARE(UInt16) -DECLARE(UInt32) -DECLARE(UInt64) -DECLARE(UInt128) -DECLARE(Int8) -DECLARE(Int16) -DECLARE(Int32) -DECLARE(Int64) -DECLARE(Float32) -DECLARE(Float64) -DECLARE(Decimal32) -DECLARE(Decimal64) -DECLARE(Decimal128) -#undef DECLARE -} diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp.in b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp.in new file mode 100644 index 00000000000..f24c278a554 --- /dev/null +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp.in @@ -0,0 +1,24 @@ +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +using TYPE = @NAME@; +void ComplexKeyCacheDictionary::get@NAME@(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const +{ + dict_struct.validateKeyTypes(key_types); + + auto & attribute = getAttribute(attribute_name); + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::@NAME@)) + throw Exception {name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; + + const auto null_value = std::get(attribute.null_values); + + getItemsNumber(attribute, key_columns, out, [&](const size_t) { return null_value; }); +} +} diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp deleted file mode 100644 index 8b7df84288d..00000000000 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "ComplexKeyCacheDictionary.h" - -namespace DB -{ -namespace ErrorCodes -{ - extern const int TYPE_MISMATCH; -} - -#define DECLARE(TYPE) \ - void ComplexKeyCacheDictionary::get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const PaddedPODArray & def, \ - ResultArrayType & out) const \ - { \ - dict_struct.validateKeyTypes(key_types); \ - \ - auto & attribute = getAttribute(attribute_name); \ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ - ErrorCodes::TYPE_MISMATCH}; \ - \ - getItemsNumber(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); \ - } -DECLARE(UInt8) -DECLARE(UInt16) -DECLARE(UInt32) -DECLARE(UInt64) -DECLARE(UInt128) -DECLARE(Int8) -DECLARE(Int16) -DECLARE(Int32) -DECLARE(Int64) -DECLARE(Float32) -DECLARE(Float64) -DECLARE(Decimal32) -DECLARE(Decimal64) -DECLARE(Decimal128) -#undef DECLARE -} diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp.in b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp.in new file mode 100644 index 00000000000..3ec01d96882 --- /dev/null +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp.in @@ -0,0 +1,27 @@ +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +using TYPE = @NAME@; + +void ComplexKeyCacheDictionary::get@NAME@(const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const PaddedPODArray & def, + ResultArrayType & out) const +{ + dict_struct.validateKeyTypes(key_types); + + auto & attribute = getAttribute(attribute_name); + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::@NAME@)) + throw Exception {name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; + + getItemsNumber(attribute, key_columns, out, [&](const size_t row) { return def[row]; }); +} +} diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp deleted file mode 100644 index ecc8794554b..00000000000 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "ComplexKeyCacheDictionary.h" - -namespace DB -{ -namespace ErrorCodes -{ - extern const int TYPE_MISMATCH; -} - -#define DECLARE(TYPE) \ - void ComplexKeyCacheDictionary::get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const TYPE def, \ - ResultArrayType & out) const \ - { \ - dict_struct.validateKeyTypes(key_types); \ - \ - auto & attribute = getAttribute(attribute_name); \ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ - ErrorCodes::TYPE_MISMATCH}; \ - \ - getItemsNumber(attribute, key_columns, out, [&](const size_t) { return def; }); \ - } -DECLARE(UInt8) -DECLARE(UInt16) -DECLARE(UInt32) -DECLARE(UInt64) -DECLARE(UInt128) -DECLARE(Int8) -DECLARE(Int16) -DECLARE(Int32) -DECLARE(Int64) -DECLARE(Float32) -DECLARE(Float64) -DECLARE(Decimal32) -DECLARE(Decimal64) -DECLARE(Decimal128) -#undef DECLARE -} diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp.in b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp.in new file mode 100644 index 00000000000..287fa25c399 --- /dev/null +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp.in @@ -0,0 +1,27 @@ +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +using TYPE = @NAME@; + +void ComplexKeyCacheDictionary::get@NAME@(const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const TYPE def, + ResultArrayType & out) const +{ + dict_struct.validateKeyTypes(key_types); + + auto & attribute = getAttribute(attribute_name); + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::@NAME@)) + throw Exception {name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; + + getItemsNumber(attribute, key_columns, out, [&](const size_t) { return def; }); +} +} diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp index e85f96de420..9a3d34eb2c7 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp @@ -1,4 +1,4 @@ -#include +#include "ComplexKeyCacheDictionary.h" namespace DB { diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp index e3af300767d..7477e01da9c 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp @@ -1,4 +1,4 @@ -#include +#include "ComplexKeyCacheDictionary.h" namespace DB { diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index 7e52b572c96..cdf01668bd2 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -1,8 +1,8 @@ #include #include -#include -#include - +#include "ComplexKeyHashedDictionary.h" +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" namespace DB { @@ -661,5 +661,24 @@ BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names return std::make_shared(shared_from_this(), max_block_size, getKeys(), column_names); } +void registerDictionaryComplexKeyHashed(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + if (!dict_struct.key) + throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + }; + factory.registerLayout("complex_key_hashed", create_layout); +} + } diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index 7dd5d5baff9..859266fb5d1 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "IDictionary.h" +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include #include @@ -243,5 +243,4 @@ private: BlockPtr saved_block; }; - } diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index 8f9a3a10d35..f1778a9fa6d 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -6,9 +6,9 @@ #include #include #include -#include -#include -#include +#include "DictionaryBlockInputStreamBase.h" +#include "DictionaryStructure.h" +#include "IDictionary.h" #include #include #include diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStreamBase.cpp b/dbms/src/Dictionaries/DictionaryBlockInputStreamBase.cpp index 4cbb50f32c0..6ef0ca5beac 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStreamBase.cpp +++ b/dbms/src/Dictionaries/DictionaryBlockInputStreamBase.cpp @@ -1,4 +1,4 @@ -#include +#include "DictionaryBlockInputStreamBase.h" namespace DB { diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp new file mode 100644 index 00000000000..e9279de23ec --- /dev/null +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -0,0 +1,51 @@ +#include "DictionaryFactory.h" + +#include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; + extern const int UNKNOWN_ELEMENT_IN_CONFIG; +} + +void DictionaryFactory::registerLayout(const std::string & layout_type, Creator create_layout) +{ + //LOG_DEBUG(log, "Register dictionary layout type `" + layout_type + "`"); + if (!registered_layouts.emplace(layout_type, std::move(create_layout)).second) + throw Exception("DictionaryFactory: the layout name '" + layout_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); +} + + +DictionaryPtr DictionaryFactory::create( + const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Context & context) const +{ + Poco::Util::AbstractConfiguration::Keys keys; + const auto & layout_prefix = config_prefix + ".layout"; + config.keys(layout_prefix, keys); + if (keys.size() != 1) + throw Exception {name + ": element dictionary.layout should have exactly one child element", + ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG}; + + const DictionaryStructure dict_struct {config, config_prefix + ".structure"}; + + auto source_ptr = DictionarySourceFactory::instance().create(name, config, config_prefix + ".source", dict_struct, context); + + const auto & layout_type = keys.front(); + + { + const auto found = registered_layouts.find(layout_type); + if (found != registered_layouts.end()) + { + const auto & create_layout = found->second; + return create_layout(name, dict_struct, config, config_prefix, std::move(source_ptr)); + } + } + + throw Exception {name + ": unknown dictionary layout type: " + layout_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; +} + +} diff --git a/dbms/src/Dictionaries/DictionaryFactory.h b/dbms/src/Dictionaries/DictionaryFactory.h index bd8f6d18af2..2c101425f41 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.h +++ b/dbms/src/Dictionaries/DictionaryFactory.h @@ -1,20 +1,41 @@ #pragma once -#include -#include #include +#include "IDictionary.h" +namespace Poco +{ +namespace Util +{ + class AbstractConfiguration; +} + +class Logger; +} namespace DB { - class Context; class DictionaryFactory : public ext::singleton { public: - DictionaryPtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, Context & context) const; + DictionaryPtr + create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Context & context) + const; + + using Creator = std::function; + + void registerLayout(const std::string & layout_type, Creator create_layout); + +private: + using LayoutRegistry = std::unordered_map; + LayoutRegistry registered_layouts; }; } diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp index d2deb769839..8441c60cf87 100644 --- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp +++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp @@ -1,41 +1,16 @@ -#include +#include "DictionarySourceFactory.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include - -#include -#if USE_POCO_MONGODB - #include -#endif -#if USE_POCO_SQLODBC || USE_POCO_DATAODBC - #include -#endif -#if USE_MYSQL - #include -#endif - +#include +#include +#include +#include #include - #include +#include "DictionaryStructure.h" namespace DB { - namespace ErrorCodes { extern const int UNKNOWN_ELEMENT_IN_CONFIG; @@ -46,149 +21,78 @@ namespace ErrorCodes namespace { - -Block createSampleBlock(const DictionaryStructure & dict_struct) -{ - Block block; - - if (dict_struct.id) - block.insert(ColumnWithTypeAndName{ColumnUInt64::create(1, 0), std::make_shared(), dict_struct.id->name}); - - if (dict_struct.key) + Block createSampleBlock(const DictionaryStructure & dict_struct) { - for (const auto & attribute : *dict_struct.key) + Block block; + + if (dict_struct.id) + block.insert(ColumnWithTypeAndName {ColumnUInt64::create(1, 0), std::make_shared(), dict_struct.id->name}); + + if (dict_struct.key) + { + for (const auto & attribute : *dict_struct.key) + { + auto column = attribute.type->createColumn(); + column->insertDefault(); + + block.insert(ColumnWithTypeAndName {std::move(column), attribute.type, attribute.name}); + } + } + + if (dict_struct.range_min) + { + for (const auto & attribute : {dict_struct.range_min, dict_struct.range_max}) + { + const auto & type = std::make_shared(attribute->type); + auto column = type->createColumn(); + column->insertDefault(); + + block.insert(ColumnWithTypeAndName {std::move(column), type, attribute->name}); + } + } + + for (const auto & attribute : dict_struct.attributes) { auto column = attribute.type->createColumn(); - column->insertDefault(); + column->insert(attribute.null_value); - block.insert(ColumnWithTypeAndName{std::move(column), attribute.type, attribute.name}); + block.insert(ColumnWithTypeAndName {std::move(column), attribute.type, attribute.name}); } + + return block; } - if (dict_struct.range_min) - { - for (const auto & attribute : { dict_struct.range_min, dict_struct.range_max }) - { - const auto & type = std::make_shared(attribute->type); - auto column = type->createColumn(); - column->insertDefault(); - - block.insert(ColumnWithTypeAndName{std::move(column), type, attribute->name}); - } - } - - for (const auto & attribute : dict_struct.attributes) - { - auto column = attribute.type->createColumn(); - column->insert(attribute.null_value); - - block.insert(ColumnWithTypeAndName{std::move(column), attribute.type, attribute.name}); - } - - return block; -} - } -DictionarySourceFactory::DictionarySourceFactory() - : log(&Poco::Logger::get("DictionarySourceFactory")) +DictionarySourceFactory::DictionarySourceFactory() : log(&Poco::Logger::get("DictionarySourceFactory")) { -#if USE_POCO_SQLODBC || USE_POCO_DATAODBC - Poco::Data::ODBC::Connector::registerConnector(); -#endif } void DictionarySourceFactory::registerSource(const std::string & source_type, Creator create_source) { LOG_DEBUG(log, "Register dictionary source type `" + source_type + "`"); if (!registered_sources.emplace(source_type, std::move(create_source)).second) - throw Exception("DictionarySourceFactory: the source name '" + source_type + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception("DictionarySourceFactory: the source name '" + source_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); } DictionarySourcePtr DictionarySourceFactory::create( - const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - const DictionaryStructure & dict_struct, Context & context) const + const std::string & name, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DictionaryStructure & dict_struct, + Context & context) const { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); if (keys.size() != 1) - throw Exception{name +": element dictionary.source should have exactly one child element", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG}; + throw Exception {name + ": element dictionary.source should have exactly one child element", + ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG}; auto sample_block = createSampleBlock(dict_struct); const auto & source_type = keys.front(); - if ("file" == source_type) - { - if (dict_struct.has_expressions) - throw Exception{"Dictionary source of type `file` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; - - const auto filename = config.getString(config_prefix + ".file.path"); - const auto format = config.getString(config_prefix + ".file.format"); - return std::make_unique(filename, format, sample_block, context); - } - else if ("mysql" == source_type) - { -#if USE_MYSQL - return std::make_unique(dict_struct, config, config_prefix + ".mysql", sample_block); -#else - throw Exception{"Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - } - else if ("clickhouse" == source_type) - { - return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", - sample_block, context); - } - else if ("mongodb" == source_type) - { -#if USE_POCO_MONGODB - return std::make_unique(dict_struct, config, config_prefix + ".mongodb", sample_block); -#else - throw Exception{"Dictionary source of type `mongodb` is disabled because poco library was built without mongodb support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - } - else if ("odbc" == source_type) - { -#if USE_POCO_SQLODBC || USE_POCO_DATAODBC - BridgeHelperPtr bridge = std::make_shared>(context, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); - return std::make_unique(dict_struct, config, config_prefix + ".odbc", sample_block, context, bridge); -#else - throw Exception{"Dictionary source of type `odbc` is disabled because poco library was built without ODBC support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - } - else if ("jdbc" == source_type) - { - throw Exception{"Dictionary source of type `jdbc` is disabled until consistent support for nullable fields.", - ErrorCodes::SUPPORT_IS_DISABLED}; -// BridgeHelperPtr bridge = std::make_shared>(config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".connection_string")); -// return std::make_unique(dict_struct, config, config_prefix + ".jdbc", sample_block, context, bridge); - } - else if ("executable" == source_type) - { - if (dict_struct.has_expressions) - throw Exception{"Dictionary source of type `executable` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; - - return std::make_unique(dict_struct, config, config_prefix + ".executable", sample_block, context); - } - else if ("http" == source_type) - { - - if (dict_struct.has_expressions) - throw Exception{"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; - - return std::make_unique(dict_struct, config, config_prefix + ".http", sample_block, context); - } - else if ("library" == source_type) - { - return std::make_unique(dict_struct, config, config_prefix + ".library", sample_block, context); - } - else { const auto found = registered_sources.find(source_type); if (found != registered_sources.end()) @@ -198,7 +102,7 @@ DictionarySourcePtr DictionarySourceFactory::create( } } - throw Exception{name + ": unknown dictionary source type: " + source_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; + throw Exception {name + ": unknown dictionary source type: " + source_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; } } diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.h b/dbms/src/Dictionaries/DictionarySourceFactory.h index 912a977a2de..1ac6e70f859 100644 --- a/dbms/src/Dictionaries/DictionarySourceFactory.h +++ b/dbms/src/Dictionaries/DictionarySourceFactory.h @@ -1,23 +1,22 @@ #pragma once -#include -#include +#include "IDictionarySource.h" #include +#include namespace Poco { - namespace Util - { - class AbstractConfiguration; - } +namespace Util +{ + class AbstractConfiguration; +} - class Logger; +class Logger; } namespace DB { - class Context; struct DictionaryStructure; @@ -30,15 +29,18 @@ public: const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context)>; + Context & context)>; DictionarySourceFactory(); void registerSource(const std::string & source_type, Creator create_source); DictionarySourcePtr create( - const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - const DictionaryStructure & dict_struct, Context & context) const; + const std::string & name, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DictionaryStructure & dict_struct, + Context & context) const; private: using SourceRegistry = std::unordered_map; diff --git a/dbms/src/Dictionaries/DictionarySourceHelpers.cpp b/dbms/src/Dictionaries/DictionarySourceHelpers.cpp index 73bc6486e7a..108f64cc0bf 100644 --- a/dbms/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/dbms/src/Dictionaries/DictionarySourceHelpers.cpp @@ -1,5 +1,5 @@ -#include -#include +#include "DictionarySourceHelpers.h" +#include "DictionaryStructure.h" #include #include #include diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index 4573b90d7ad..c0255c9de7e 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -1,4 +1,4 @@ -#include +#include "DictionaryStructure.h" #include #include #include diff --git a/dbms/src/Dictionaries/Embedded/CMakeLists.txt b/dbms/src/Dictionaries/Embedded/CMakeLists.txt new file mode 100644 index 00000000000..2af439c9677 --- /dev/null +++ b/dbms/src/Dictionaries/Embedded/CMakeLists.txt @@ -0,0 +1,5 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) +add_headers_and_sources(clickhouse_dictionaries_embedded .) +add_headers_and_sources(clickhouse_dictionaries_embedded GeodataProviders) +add_library(clickhouse_dictionaries_embedded ${LINK_MODE} ${clickhouse_dictionaries_embedded_sources}) +target_link_libraries(clickhouse_dictionaries_embedded PRIVATE clickhouse_common_io ${MYSQLXX_LIBRARY}) diff --git a/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.cpp b/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.cpp index 2d2967e72a1..0932038ea38 100644 --- a/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.cpp +++ b/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.cpp @@ -1,8 +1,8 @@ -#include - -#include -#include +#include "GeoDictionariesLoader.h" +#include +#include "GeodataProviders/HierarchiesProvider.h" +#include "GeodataProviders/NamesProvider.h" std::unique_ptr GeoDictionariesLoader::reloadRegionsHierarchies( const Poco::Util::AbstractConfiguration & config) diff --git a/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h b/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h index 312637aea90..af7dff31ad3 100644 --- a/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h +++ b/dbms/src/Dictionaries/Embedded/GeoDictionariesLoader.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "IGeoDictionariesLoader.h" // Default implementation of geo dictionaries loader used by native server application diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h index 8e68095fc65..4cfdbd0d75f 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "Types.h" #include struct RegionEntry diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp index 062c65a16a6..26653f1e905 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp @@ -1,8 +1,7 @@ -#include -#include +#include "HierarchiesProvider.h" +#include "HierarchyFormatReader.h" #include - #include #include #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h index a8f956b1bd3..7025a8e5560 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h @@ -1,9 +1,8 @@ #pragma once -#include +#include "IHierarchiesProvider.h" #include - #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp index 5716879ce92..ace4a1adaa0 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp @@ -1,4 +1,4 @@ -#include +#include "HierarchyFormatReader.h" #include #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h index 1d20c65f62a..e120785a9fa 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "IHierarchiesProvider.h" #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h index d12fc312dcf..a8cc915f33f 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "Entries.h" #include #include #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h index fb18684b3fa..bb529a9557b 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "Entries.h" #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp index df198ebde54..9d0c57f18eb 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.cpp @@ -1,4 +1,4 @@ -#include +#include "NamesFormatReader.h" #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h index d89ccfc3cbe..3f3063be25d 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "INamesProvider.h" #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp index ef44c0a4e03..c9042f33788 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.cpp @@ -1,6 +1,6 @@ -#include -#include +#include "NamesProvider.h" +#include "NamesFormatReader.h" #include diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h index 916dfe38230..937b679c65d 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "INamesProvider.h" #include diff --git a/dbms/src/Dictionaries/Embedded/IGeoDictionariesLoader.h b/dbms/src/Dictionaries/Embedded/IGeoDictionariesLoader.h index 548ccaf9258..d2637470f96 100644 --- a/dbms/src/Dictionaries/Embedded/IGeoDictionariesLoader.h +++ b/dbms/src/Dictionaries/Embedded/IGeoDictionariesLoader.h @@ -1,12 +1,19 @@ #pragma once -#include -#include - -#include - +#include "RegionsHierarchies.h" +#include "RegionsNames.h" #include +namespace Poco +{ + namespace Util + { + class AbstractConfiguration; + } + + class Logger; +} + // Provides actual versions of geo dictionaries (regions hierarchies, regions names) // Bind data structures (RegionsHierarchies, RegionsNames) with data providers diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp index d128ea53896..0a1a2928d65 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp +++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.cpp @@ -1,7 +1,6 @@ -#include +#include "RegionsHierarchies.h" #include - #include diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h index 8d05fa15d6e..d2b4ee5d48b 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h +++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchies.h @@ -1,10 +1,8 @@ #pragma once -#include -#include - +#include "RegionsHierarchy.h" +#include "GeodataProviders/IHierarchiesProvider.h" #include - #include diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp index 2a277e10fe2..3257eb355eb 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp +++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp @@ -1,12 +1,10 @@ -#include -#include +#include "RegionsHierarchy.h" +#include "GeodataProviders/IHierarchiesProvider.h" #include #include - #include #include - #include diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h index 838a99f71a1..3e81087629b 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h +++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "GeodataProviders/IHierarchiesProvider.h" #include #include #include diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.cpp b/dbms/src/Dictionaries/Embedded/RegionsNames.cpp index 2c48f1f18c2..81614610f3a 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsNames.cpp +++ b/dbms/src/Dictionaries/Embedded/RegionsNames.cpp @@ -1,11 +1,9 @@ -#include -#include +#include "RegionsNames.h" +#include "GeodataProviders/INamesProvider.h" #include #include - #include - #include namespace DB diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.h b/dbms/src/Dictionaries/Embedded/RegionsNames.h index 2cab42d9cd2..543e985fbd7 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsNames.h +++ b/dbms/src/Dictionaries/Embedded/RegionsNames.h @@ -1,12 +1,9 @@ #pragma once -#include - +#include "GeodataProviders/INamesProvider.h" #include - #include #include - #include #include diff --git a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp index 741d3833c34..841d602c22e 100644 --- a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp +++ b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.cpp @@ -1,7 +1,7 @@ #include #if USE_MYSQL -#include +#include "TechDataHierarchy.h" #include #include diff --git a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h index 060180bcc44..439ece8f6ca 100644 --- a/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h +++ b/dbms/src/Dictionaries/Embedded/TechDataHierarchy.h @@ -1,12 +1,18 @@ #pragma once -#include -#include - #include - #include +namespace Poco +{ + namespace Util + { + class AbstractConfiguration; + } + + class Logger; +} + /** @brief Class that lets you know if a search engine or operating system belongs * another search engine or operating system, respectively. diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp index c17ce3e2461..0a04ec1a981 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp @@ -1,12 +1,15 @@ +#include "ExecutableDictionarySource.h" + #include #include -#include #include #include #include -#include +#include "DictionarySourceHelpers.h" #include #include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" namespace DB @@ -229,4 +232,19 @@ std::string ExecutableDictionarySource::toString() const return "Executable: " + command; } +void registerDictionarySourceExecutable(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & context) -> DictionarySourcePtr { + if (dict_struct.has_expressions) + throw Exception {"Dictionary source of type `executable` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; + + return std::make_unique(dict_struct, config, config_prefix + ".executable", sample_block, context); + }; + factory.registerSource("executable", createTableSource); +} + } diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.h b/dbms/src/Dictionaries/ExecutableDictionarySource.h index a6fe3373aae..00652ad28d9 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.h +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.h @@ -1,7 +1,7 @@ #pragma once -#include -#include +#include "IDictionarySource.h" +#include "DictionaryStructure.h" namespace Poco { class Logger; } diff --git a/dbms/src/Dictionaries/ExternalQueryBuilder.cpp b/dbms/src/Dictionaries/ExternalQueryBuilder.cpp index bb420fdec8f..e8194344848 100644 --- a/dbms/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/dbms/src/Dictionaries/ExternalQueryBuilder.cpp @@ -3,9 +3,9 @@ #include #include #include -#include -#include -#include +#include "writeParenthesisedString.h" +#include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" namespace DB diff --git a/dbms/src/Dictionaries/ExternalResultDescription.cpp b/dbms/src/Dictionaries/ExternalResultDescription.cpp index a997cb4d07e..2bb903148e1 100644 --- a/dbms/src/Dictionaries/ExternalResultDescription.cpp +++ b/dbms/src/Dictionaries/ExternalResultDescription.cpp @@ -1,5 +1,5 @@ #include -#include +#include "ExternalResultDescription.h" #include #include #include diff --git a/dbms/src/Dictionaries/FileDictionarySource.cpp b/dbms/src/Dictionaries/FileDictionarySource.cpp index 6f37ae2840f..b135ac98c2c 100644 --- a/dbms/src/Dictionaries/FileDictionarySource.cpp +++ b/dbms/src/Dictionaries/FileDictionarySource.cpp @@ -1,9 +1,11 @@ +#include "FileDictionarySource.h" + #include #include -#include #include #include - +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" namespace DB { @@ -46,4 +48,23 @@ Poco::Timestamp FileDictionarySource::getLastModification() const return Poco::File{filename}.getLastModified(); } +void registerDictionarySourceFile(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & context) -> DictionarySourcePtr { + if (dict_struct.has_expressions) + throw Exception {"Dictionary source of type `file` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; + + const auto filename = config.getString(config_prefix + ".file.path"); + const auto format = config.getString(config_prefix + ".file.format"); + + return std::make_unique(filename, format, sample_block, context); + }; + + factory.registerSource("file", createTableSource); +} + } diff --git a/dbms/src/Dictionaries/FileDictionarySource.h b/dbms/src/Dictionaries/FileDictionarySource.h index 5dd00223981..fd85eecfb53 100644 --- a/dbms/src/Dictionaries/FileDictionarySource.h +++ b/dbms/src/Dictionaries/FileDictionarySource.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "IDictionarySource.h" #include @@ -9,7 +9,6 @@ namespace DB class Context; - /// Allows loading dictionaries from a file with given format, does not support "random access" class FileDictionarySource final : public IDictionarySource { diff --git a/dbms/src/Dictionaries/FlatDictionary.cpp b/dbms/src/Dictionaries/FlatDictionary.cpp index 47a80ce8bc6..f4fff1008dc 100644 --- a/dbms/src/Dictionaries/FlatDictionary.cpp +++ b/dbms/src/Dictionaries/FlatDictionary.cpp @@ -1,7 +1,7 @@ -#include -#include +#include "FlatDictionary.h" +#include "DictionaryBlockInputStream.h" #include - +#include "DictionaryFactory.h" namespace DB { @@ -14,9 +14,9 @@ namespace ErrorCodes extern const int DICTIONARY_IS_EMPTY; extern const int LOGICAL_ERROR; extern const int UNKNOWN_TYPE; + extern const int UNSUPPORTED_METHOD; } - static const auto initial_array_size = 1024; static const auto max_array_size = 500000; @@ -634,5 +634,33 @@ BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_nam return std::make_shared(shared_from_this(), max_block_size, getIds() ,column_names); } +void registerDictionaryFlat(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + + if (dict_struct.key) + throw Exception {"'key' is not supported for dictionary of layout 'flat'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception {name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + + + }; + factory.registerLayout("flat", create_layout); +} + + } diff --git a/dbms/src/Dictionaries/FlatDictionary.h b/dbms/src/Dictionaries/FlatDictionary.h index 272b248d23e..91fb10afa57 100644 --- a/dbms/src/Dictionaries/FlatDictionary.h +++ b/dbms/src/Dictionaries/FlatDictionary.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "IDictionary.h" +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include #include diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.cpp b/dbms/src/Dictionaries/HTTPDictionarySource.cpp index 9099d7de72c..08a6411cc4c 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/src/Dictionaries/HTTPDictionarySource.cpp @@ -1,4 +1,4 @@ -#include +#include "HTTPDictionarySource.h" #include #include @@ -6,9 +6,12 @@ #include #include #include -#include +#include "DictionarySourceHelpers.h" #include #include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + namespace DB { @@ -149,4 +152,19 @@ std::string HTTPDictionarySource::toString() const return uri.toString(); } +void registerDictionarySourceHTTP(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & context) -> DictionarySourcePtr { + if (dict_struct.has_expressions) + throw Exception {"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; + + return std::make_unique(dict_struct, config, config_prefix + ".http", sample_block, context); + }; + factory.registerSource("http", createTableSource); +} + } diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.h b/dbms/src/Dictionaries/HTTPDictionarySource.h index ac49cc59e16..531afcc4f6c 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.h +++ b/dbms/src/Dictionaries/HTTPDictionarySource.h @@ -1,8 +1,8 @@ #pragma once #include -#include -#include +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index 737bef6b01d..b74f043143d 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -1,6 +1,7 @@ #include -#include -#include +#include "HashedDictionary.h" +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" namespace DB { @@ -11,6 +12,7 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; extern const int DICTIONARY_IS_EMPTY; + extern const int UNSUPPORTED_METHOD; } @@ -623,4 +625,29 @@ BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_n return std::make_shared(shared_from_this(), max_block_size, getIds(), column_names); } +void registerDictionaryHashed(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + if (dict_struct.key) + throw Exception {"'key' is not supported for dictionary of layout 'hashed'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception {name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + + }; + factory.registerLayout("hashed", create_layout); +} + } diff --git a/dbms/src/Dictionaries/HashedDictionary.h b/dbms/src/Dictionaries/HashedDictionary.h index e54c01e3598..c5269e3f152 100644 --- a/dbms/src/Dictionaries/HashedDictionary.h +++ b/dbms/src/Dictionaries/HashedDictionary.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "IDictionary.h" +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include #include diff --git a/dbms/src/Dictionaries/IDictionary.h b/dbms/src/Dictionaries/IDictionary.h index c2c180979f9..eed6e0b4c71 100644 --- a/dbms/src/Dictionaries/IDictionary.h +++ b/dbms/src/Dictionaries/IDictionary.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include "IDictionarySource.h" namespace DB { diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index a0505ee79f1..f7cbc6e2081 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -1,12 +1,15 @@ #include -#include -#include +#include "LibraryDictionarySource.h" +#include "LibraryDictionarySourceExternal.h" #include #include #include #include #include #include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + namespace DB { @@ -269,4 +272,17 @@ std::string LibraryDictionarySource::toString() const { return path; } + +void registerDictionarySourceLibrary(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & context) -> DictionarySourcePtr { + return std::make_unique(dict_struct, config, config_prefix + ".library", sample_block, context); + }; + factory.registerSource("library", createTableSource); +} + } diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.h b/dbms/src/Dictionaries/LibraryDictionarySource.h index f59a443784a..4a709d88f5e 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.h +++ b/dbms/src/Dictionaries/LibraryDictionarySource.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "DictionaryStructure.h" +#include "ExternalResultDescription.h" +#include "IDictionarySource.h" #include #include @@ -78,4 +78,5 @@ private: std::shared_ptr settings; void * lib_data = nullptr; }; + } diff --git a/dbms/src/Dictionaries/LibraryDictionarySourceExternal.cpp b/dbms/src/Dictionaries/LibraryDictionarySourceExternal.cpp index bcbcb4d0fbc..aa0884d548a 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySourceExternal.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySourceExternal.cpp @@ -1,4 +1,4 @@ -#include +#include "LibraryDictionarySourceExternal.h" #include namespace diff --git a/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp b/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp index 1f0f91a257f..36a3f0b6055 100644 --- a/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp +++ b/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp @@ -10,8 +10,8 @@ #include #include -#include -#include +#include "DictionaryStructure.h" +#include "MongoDBBlockInputStream.h" #include #include #include diff --git a/dbms/src/Dictionaries/MongoDBBlockInputStream.h b/dbms/src/Dictionaries/MongoDBBlockInputStream.h index 3c964708c91..bcf54107f62 100644 --- a/dbms/src/Dictionaries/MongoDBBlockInputStream.h +++ b/dbms/src/Dictionaries/MongoDBBlockInputStream.h @@ -2,7 +2,7 @@ #include #include -#include +#include "ExternalResultDescription.h" namespace Poco diff --git a/dbms/src/Dictionaries/MongoDBDictionarySource.cpp b/dbms/src/Dictionaries/MongoDBDictionarySource.cpp index e8fe62bd8b6..bbb7893e458 100644 --- a/dbms/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/dbms/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,21 +1,54 @@ -#include -#if USE_POCO_MONGODB -#include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" +#include "MongoDBDictionarySource.h" +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + +void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & /* context */) -> DictionarySourcePtr { +#if USE_POCO_MONGODB + return std::make_unique(dict_struct, config, config_prefix + ".mongodb", sample_block); +#else + (void)dict_struct; + (void)config; + (void)config_prefix; + (void)sample_block; + throw Exception {"Dictionary source of type `mongodb` is disabled because poco library was built without mongodb support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }; + factory.registerSource("mongodb", createTableSource); +} + +} + + +#if USE_POCO_MONGODB + +#include #include #include #include #include #include - #include // only after poco // naming conflict: // Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value); // dbms/src/IO/WriteHelpers.h:146 #define writeCString(s, buf) -#include -#include +#include "MongoDBBlockInputStream.h" #include #include #include diff --git a/dbms/src/Dictionaries/MongoDBDictionarySource.h b/dbms/src/Dictionaries/MongoDBDictionarySource.h index 92428f818b2..f7c95487a1a 100644 --- a/dbms/src/Dictionaries/MongoDBDictionarySource.h +++ b/dbms/src/Dictionaries/MongoDBDictionarySource.h @@ -1,8 +1,10 @@ #pragma once -#include -#include +#include +#if USE_POCO_MONGODB +#include "IDictionarySource.h" +#include "DictionaryStructure.h" namespace Poco { @@ -79,3 +81,10 @@ private: }; } +#endif + +/*namespace DB +{ +class DictionarySourceFactory; +void registerDictionarySourceMongoDB(DictionarySourceFactory & factory); +}*/ diff --git a/dbms/src/Dictionaries/MySQLBlockInputStream.cpp b/dbms/src/Dictionaries/MySQLBlockInputStream.cpp index 41318e78b83..8016eedb56c 100644 --- a/dbms/src/Dictionaries/MySQLBlockInputStream.cpp +++ b/dbms/src/Dictionaries/MySQLBlockInputStream.cpp @@ -1,7 +1,7 @@ #include #if USE_MYSQL -#include +#include "MySQLBlockInputStream.h" #include #include #include diff --git a/dbms/src/Dictionaries/MySQLBlockInputStream.h b/dbms/src/Dictionaries/MySQLBlockInputStream.h index 7e082fdc21d..9e92f0c03a4 100644 --- a/dbms/src/Dictionaries/MySQLBlockInputStream.h +++ b/dbms/src/Dictionaries/MySQLBlockInputStream.h @@ -2,7 +2,7 @@ #include #include -#include +#include "ExternalResultDescription.h" #include #include #include diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.cpp b/dbms/src/Dictionaries/MySQLDictionarySource.cpp index 63639ceb64c..3fa8804845c 100644 --- a/dbms/src/Dictionaries/MySQLDictionarySource.cpp +++ b/dbms/src/Dictionaries/MySQLDictionarySource.cpp @@ -1,18 +1,51 @@ -#include -#if USE_MYSQL +#include "MySQLDictionarySource.h" +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + +void registerDictionarySourceMysql(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & /* context */) -> DictionarySourcePtr { +#if USE_MYSQL + return std::make_unique(dict_struct, config, config_prefix + ".mysql", sample_block); +#else + (void)dict_struct; + (void)config; + (void)config_prefix; + (void)sample_block; + throw Exception {"Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }; + factory.registerSource("mysql", createTableSource); +} + +} + + +#if USE_MYSQL #include #include #include -#include - #include #include - -#include -#include -#include - +#include "MySQLBlockInputStream.h" +#include "readInvalidateQuery.h" #include diff --git a/dbms/src/Dictionaries/MySQLDictionarySource.h b/dbms/src/Dictionaries/MySQLDictionarySource.h index 2b9a63ac194..cda7f72b29c 100644 --- a/dbms/src/Dictionaries/MySQLDictionarySource.h +++ b/dbms/src/Dictionaries/MySQLDictionarySource.h @@ -1,8 +1,11 @@ #pragma once -#include -#include -#include +#include +#if USE_MYSQL + +#include "IDictionarySource.h" +#include "ExternalQueryBuilder.h" +#include "DictionaryStructure.h" #include #include @@ -81,3 +84,5 @@ private: }; } + +#endif diff --git a/dbms/src/Dictionaries/ODBCBlockInputStream.cpp b/dbms/src/Dictionaries/ODBCBlockInputStream.cpp index 85f727963d6..879ca2fa7c2 100644 --- a/dbms/src/Dictionaries/ODBCBlockInputStream.cpp +++ b/dbms/src/Dictionaries/ODBCBlockInputStream.cpp @@ -1,12 +1,9 @@ -#include - +#include "ODBCBlockInputStream.h" #include #include #include - #include #include - #include #include #include diff --git a/dbms/src/Dictionaries/ODBCBlockInputStream.h b/dbms/src/Dictionaries/ODBCBlockInputStream.h index b9f5543c271..a5f6ab872ce 100644 --- a/dbms/src/Dictionaries/ODBCBlockInputStream.h +++ b/dbms/src/Dictionaries/ODBCBlockInputStream.h @@ -2,12 +2,10 @@ #include #include -#include - +#include "ExternalResultDescription.h" #include #include #include - #include diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index a6c31cf7a9a..a08cc5ca578 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -5,10 +5,10 @@ #include #include #include -#include -#include -#include -#include +#include "DictionaryBlockInputStreamBase.h" +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "RangeHashedDictionary.h" #include namespace DB diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index 9aeea8eff7c..5db404b793d 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -1,10 +1,10 @@ -#include -#include +#include "RangeHashedDictionary.h" +#include "RangeDictionaryBlockInputStream.h" #include #include #include #include - +#include "DictionaryFactory.h" namespace { @@ -50,6 +50,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int DICTIONARY_IS_EMPTY; extern const int TYPE_MISMATCH; + extern const int UNSUPPORTED_METHOD; } bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date) @@ -559,4 +560,28 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & col return callable.stream; } + +void registerDictionaryRangeHashed(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + if (dict_struct.key) + throw Exception {"'key' is not supported for dictionary of layout 'range_hashed'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (!dict_struct.range_min || !dict_struct.range_max) + throw Exception {name + ": dictionary of layout 'range_hashed' requires .structure.range_min and .structure.range_max", + ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + }; + factory.registerLayout("range_hashed", create_layout); +} + } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.h b/dbms/src/Dictionaries/RangeHashedDictionary.h index 5e8adc4ceae..2b4591b564d 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.h +++ b/dbms/src/Dictionaries/RangeHashedDictionary.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "IDictionary.h" +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include #include diff --git a/dbms/src/Dictionaries/TrieDictionary.cpp b/dbms/src/Dictionaries/TrieDictionary.cpp index 615a8df2ae0..047b5932818 100644 --- a/dbms/src/Dictionaries/TrieDictionary.cpp +++ b/dbms/src/Dictionaries/TrieDictionary.cpp @@ -3,16 +3,17 @@ #include #include #include -#include +#include "TrieDictionary.h" #include #include -#include +#include "DictionaryBlockInputStream.h" #include #include #include #include #include #include +#include "DictionaryFactory.h" namespace DB @@ -665,4 +666,25 @@ BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_nam std::move(getKeys), std::move(getView)); } + +void registerDictionaryTrie(DictionaryFactory & factory) +{ + auto create_layout = [=]( + const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr + ) -> DictionaryPtr { + if (!dict_struct.key) + throw Exception {"'key' is required for dictionary of layout 'ip_trie'", ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + // This is specialised trie for storing IPv4 and IPv6 prefixes. + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + }; + factory.registerLayout("ip_trie", create_layout); +} + } diff --git a/dbms/src/Dictionaries/TrieDictionary.h b/dbms/src/Dictionaries/TrieDictionary.h index af4879b5a04..90a68983abc 100644 --- a/dbms/src/Dictionaries/TrieDictionary.h +++ b/dbms/src/Dictionaries/TrieDictionary.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include "IDictionary.h" +#include "IDictionarySource.h" +#include "DictionaryStructure.h" #include #include #include @@ -242,5 +242,4 @@ private: Logger * logger; }; - } diff --git a/dbms/src/Dictionaries/XDBCDictionarySource.cpp b/dbms/src/Dictionaries/XDBCDictionarySource.cpp index e7285f17025..16833420dfe 100644 --- a/dbms/src/Dictionaries/XDBCDictionarySource.cpp +++ b/dbms/src/Dictionaries/XDBCDictionarySource.cpp @@ -1,4 +1,5 @@ -#include +#include "XDBCDictionarySource.h" + #include #include #include @@ -7,17 +8,28 @@ #include #include #include -#include +#include "readInvalidateQuery.h" #include #include #include #include #include +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" +#include +#if USE_POCO_SQLODBC || USE_POCO_DATAODBC + #include +#endif namespace DB { +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + namespace { class XDBCBridgeBlockInputStream : public IProfilingBlockInputStream @@ -219,4 +231,42 @@ BlockInputStreamPtr XDBCDictionarySource::loadBase(const std::string & query) co timeouts, bridge_helper->getName() + "BlockInputStream"); } +void registerDictionarySourceXDBC(DictionarySourceFactory & factory) +{ +#if USE_POCO_SQLODBC || USE_POCO_DATAODBC + Poco::Data::ODBC::Connector::registerConnector(); +#endif + + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + Context & context) -> DictionarySourcePtr { +#if USE_POCO_SQLODBC || USE_POCO_DATAODBC + BridgeHelperPtr bridge = std::make_shared>(context, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); + return std::make_unique(dict_struct, config, config_prefix + ".odbc", sample_block, context, bridge); +#else + throw Exception {"Dictionary source of type `odbc` is disabled because poco library was built without ODBC support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }; + factory.registerSource("odbc", createTableSource); +} + +void registerDictionarySourceJDBC(DictionarySourceFactory & factory) +{ + auto createTableSource = [=](const DictionaryStructure & /* dict_struct */, + const Poco::Util::AbstractConfiguration & /* config */, + const std::string & /* config_prefix */, + Block & /* sample_block */, + const Context & /* context */) -> DictionarySourcePtr { + throw Exception {"Dictionary source of type `jdbc` is disabled until consistent support for nullable fields.", + ErrorCodes::SUPPORT_IS_DISABLED}; + // BridgeHelperPtr bridge = std::make_shared>(config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".connection_string")); + // return std::make_unique(dict_struct, config, config_prefix + ".jdbc", sample_block, context, bridge); + }; + factory.registerSource("jdbc", createTableSource); +} + + } diff --git a/dbms/src/Dictionaries/XDBCDictionarySource.h b/dbms/src/Dictionaries/XDBCDictionarySource.h index 352b7eecbd3..cee862e8fc3 100644 --- a/dbms/src/Dictionaries/XDBCDictionarySource.h +++ b/dbms/src/Dictionaries/XDBCDictionarySource.h @@ -2,11 +2,9 @@ #include #include - -#include -#include -#include - +#include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" +#include "IDictionarySource.h" #include #include @@ -83,4 +81,5 @@ private: ConnectionTimeouts timeouts; const Context & global_context; }; + } diff --git a/dbms/src/Dictionaries/readInvalidateQuery.cpp b/dbms/src/Dictionaries/readInvalidateQuery.cpp index 2095f696ea4..cacd7636b9f 100644 --- a/dbms/src/Dictionaries/readInvalidateQuery.cpp +++ b/dbms/src/Dictionaries/readInvalidateQuery.cpp @@ -1,4 +1,4 @@ -#include +#include "readInvalidateQuery.h" #include namespace DB diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp new file mode 100644 index 00000000000..1a8c5a7be7b --- /dev/null +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -0,0 +1,52 @@ +#include "DictionaryFactory.h" +#include "DictionarySourceFactory.h" + +namespace DB +{ +void registerDictionarySourceFile(DictionarySourceFactory & source_factory); +void registerDictionarySourceMysql(DictionarySourceFactory & source_factory); +void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory); +void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory); +void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory); +void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory); +void registerDictionarySourceExecutable(DictionarySourceFactory & source_factory); +void registerDictionarySourceHTTP(DictionarySourceFactory & source_factory); +void registerDictionarySourceLibrary(DictionarySourceFactory & source_factory); + +void registerDictionaryRangeHashed(DictionaryFactory & factory); +void registerDictionaryComplexKeyHashed(DictionaryFactory & factory); +void registerDictionaryComplexKeyCache(DictionaryFactory & factory); +void registerDictionaryTrie(DictionaryFactory & factory); +void registerDictionaryFlat(DictionaryFactory & factory); +void registerDictionaryHashed(DictionaryFactory & factory); +void registerDictionaryCache(DictionaryFactory & factory); + + +void registerDictionaries() +{ + { + auto & source_factory = DictionarySourceFactory::instance(); + registerDictionarySourceFile(source_factory); + registerDictionarySourceMysql(source_factory); + registerDictionarySourceClickHouse(source_factory); + registerDictionarySourceMongoDB(source_factory); + registerDictionarySourceXDBC(source_factory); + registerDictionarySourceJDBC(source_factory); + registerDictionarySourceExecutable(source_factory); + registerDictionarySourceHTTP(source_factory); + registerDictionarySourceLibrary(source_factory); + } + + { + auto & factory = DictionaryFactory::instance(); + registerDictionaryRangeHashed(factory); + registerDictionaryComplexKeyHashed(factory); + registerDictionaryComplexKeyCache(factory); + registerDictionaryTrie(factory); + registerDictionaryFlat(factory); + registerDictionaryHashed(factory); + registerDictionaryCache(factory); + } +} + +} diff --git a/dbms/src/Dictionaries/registerDictionaries.h b/dbms/src/Dictionaries/registerDictionaries.h new file mode 100644 index 00000000000..e8480277c2c --- /dev/null +++ b/dbms/src/Dictionaries/registerDictionaries.h @@ -0,0 +1,6 @@ +#pragma once + +namespace DB +{ +void registerDictionaries(); +} diff --git a/dbms/src/Dictionaries/writeParenthesisedString.cpp b/dbms/src/Dictionaries/writeParenthesisedString.cpp index e2015d8e604..e162a7820f1 100644 --- a/dbms/src/Dictionaries/writeParenthesisedString.cpp +++ b/dbms/src/Dictionaries/writeParenthesisedString.cpp @@ -1,4 +1,4 @@ -#include +#include "writeParenthesisedString.h" namespace DB { diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index b9567bed775..1307c47260e 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -10,7 +10,18 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources}) -target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE ${CONSISTENT_HASHING_LIBRARY} consistent-hashing-sumbur ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash ${BASE64_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) +target_link_libraries(clickhouse_functions + PUBLIC + dbms + PRIVATE + clickhouse_dictionaries + ${CONSISTENT_HASHING_LIBRARY} + consistent-hashing-sumbur + ${FARMHASH_LIBRARIES} + ${METROHASH_LIBRARIES} + murmurhash + ${BASE64_LIBRARY} + ${OPENSSL_CRYPTO_LIBRARY}) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) diff --git a/dbms/src/IO/HTTPCommon.cpp b/dbms/src/IO/HTTPCommon.cpp index c6b7e47225c..8e6d52738ea 100644 --- a/dbms/src/IO/HTTPCommon.cpp +++ b/dbms/src/IO/HTTPCommon.cpp @@ -84,6 +84,8 @@ namespace /// doesn't work properly without patch #if POCO_CLICKHOUSE_PATCH session->setKeepAlive(keep_alive); +#else + (void)keep_alive; // Avoid warning: unused parameter #endif return session; diff --git a/dbms/src/Dictionaries/CatBoostModel.cpp b/dbms/src/Interpreters/CatBoostModel.cpp similarity index 99% rename from dbms/src/Dictionaries/CatBoostModel.cpp rename to dbms/src/Interpreters/CatBoostModel.cpp index fd1ed454e0a..61fc1d19785 100644 --- a/dbms/src/Dictionaries/CatBoostModel.cpp +++ b/dbms/src/Interpreters/CatBoostModel.cpp @@ -1,4 +1,5 @@ -#include +#include "CatBoostModel.h" + #include #include #include diff --git a/dbms/src/Dictionaries/CatBoostModel.h b/dbms/src/Interpreters/CatBoostModel.h similarity index 100% rename from dbms/src/Dictionaries/CatBoostModel.h rename to dbms/src/Interpreters/CatBoostModel.h diff --git a/dbms/src/Interpreters/DictionaryFactory.cpp b/dbms/src/Interpreters/DictionaryFactory.cpp deleted file mode 100644 index 3e3fa774465..00000000000 --- a/dbms/src/Interpreters/DictionaryFactory.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int EXCESSIVE_ELEMENT_IN_CONFIG; - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int UNSUPPORTED_METHOD; - extern const int TOO_SMALL_BUFFER_SIZE; - extern const int BAD_ARGUMENTS; -} - - -DictionaryPtr DictionaryFactory::create(const std::string & name, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, Context & context) const -{ - Poco::Util::AbstractConfiguration::Keys keys; - const auto & layout_prefix = config_prefix + ".layout"; - config.keys(layout_prefix, keys); - if (keys.size() != 1) - throw Exception{name + ": element dictionary.layout should have exactly one child element", - ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG}; - - const DictionaryStructure dict_struct{config, config_prefix + ".structure"}; - - auto source_ptr = DictionarySourceFactory::instance().create( - name, config, config_prefix + ".source", dict_struct, context); - - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - - const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - - const auto & layout_type = keys.front(); - - if ("range_hashed" == layout_type) - { - if (dict_struct.key) - throw Exception{"'key' is not supported for dictionary of layout 'range_hashed'", - ErrorCodes::UNSUPPORTED_METHOD}; - - if (!dict_struct.range_min || !dict_struct.range_max) - throw Exception{name + ": dictionary of layout 'range_hashed' requires .structure.range_min and .structure.range_max", - ErrorCodes::BAD_ARGUMENTS}; - - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); - } - else if ("complex_key_hashed" == layout_type) - { - if (!dict_struct.key) - throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", - ErrorCodes::BAD_ARGUMENTS}; - - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); - } - else if ("complex_key_cache" == layout_type) - { - if (!dict_struct.key) - throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", - ErrorCodes::BAD_ARGUMENTS}; - - const auto size = config.getInt(layout_prefix + ".complex_key_cache.size_in_cells"); - if (size == 0) - throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", - ErrorCodes::TOO_SMALL_BUFFER_SIZE}; - - if (require_nonempty) - throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; - - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); - } - else if ("ip_trie" == layout_type) - { - if (!dict_struct.key) - throw Exception{"'key' is required for dictionary of layout 'ip_trie'", - ErrorCodes::BAD_ARGUMENTS}; - - // This is specialised trie for storing IPv4 and IPv6 prefixes. - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); - } - else - { - if (dict_struct.key) - throw Exception{"'key' is not supported for dictionary of layout '" + layout_type + "'", - ErrorCodes::UNSUPPORTED_METHOD}; - - if (dict_struct.range_min || dict_struct.range_max) - throw Exception{name + ": elements .structure.range_min and .structure.range_max should be defined only " - "for a dictionary of layout 'range_hashed'", - ErrorCodes::BAD_ARGUMENTS}; - - if ("flat" == layout_type) - { - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); - } - else if ("hashed" == layout_type) - { - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); - } - else if ("cache" == layout_type) - { - const auto size = config.getInt(layout_prefix + ".cache.size_in_cells"); - if (size == 0) - throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", - ErrorCodes::TOO_SMALL_BUFFER_SIZE}; - - if (require_nonempty) - throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; - - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); - } - } - - throw Exception{name + ": unknown dictionary layout type: " + layout_type, - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; -} - - -} diff --git a/dbms/src/Interpreters/ExternalModels.h b/dbms/src/Interpreters/ExternalModels.h index cd0fe102bb1..9a048032bd7 100644 --- a/dbms/src/Interpreters/ExternalModels.h +++ b/dbms/src/Interpreters/ExternalModels.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/dbms/src/Storages/System/StorageSystemModels.cpp b/dbms/src/Storages/System/StorageSystemModels.cpp index 2479742c8ec..0b609eb8b7d 100644 --- a/dbms/src/Storages/System/StorageSystemModels.cpp +++ b/dbms/src/Storages/System/StorageSystemModels.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB {