From c2fa7ffcd3855a1e2079529edfce208276df0295 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 9 Oct 2019 01:41:09 +0300 Subject: [PATCH 001/122] Drop note about "estimation error for large sets will be large" Since error is not that large, especially for uniqCombined64(): rows error uniqCombined64()/rows - 1e7 0.00219 (1.0021969) - 1e8 0.00096 (0.99903523) - 2e8 0.50200 (1.005022735) - 1e9 0.99944 (0.999435738) - 1e10 0.01399 (0.9998568925) - 1e11 0.09800 (1.00097891658) - 1e12 0.29800 (0.997024870685) - 1e13 0.62200 (1.0062169910992) - 1e14 0.06200 (1.00062143604171) --- docs/en/query_language/agg_functions/reference.md | 2 +- docs/ru/query_language/agg_functions/reference.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 5e1bfaf5570..542d7f20aa4 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -558,7 +558,7 @@ Calculates the approximate number of different argument values. uniqCombined(HLL_precision)(x[, ...]) ``` -The `uniqCombined` function is a good choice for calculating the number of different values, but keep in mind that the estimation error for large sets (200 million elements and more) will be larger than the theoretical value due to the poor hash function choice. +The `uniqCombined` function is a good choice for calculating the number of different values. **Parameters** diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index 66d97c8b7ee..68e40c7bae0 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -568,7 +568,7 @@ uniq(x[, ...]) uniqCombined(HLL_precision)(x[, ...]) ``` -Функция `uniqCombined` — это хороший выбор для вычисления количества различных значений, однако стоит иметь в виду, что ошибка оценки для больших множеств (более 200 миллионов элементов) будет выше теоретического значения из-за плохого выбора хэш-функции. +Функция `uniqCombined` — это хороший выбор для вычисления количества различных значений. **Параметры** From 67fc3347c2ac9a2905ab572c9eaa2c9bf22004a1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Oct 2019 20:33:01 +0300 Subject: [PATCH 002/122] Preparing databases for dictionaries --- contrib/mariadb-connector-c | 2 +- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Databases/DatabaseDictionary.cpp | 8 +- dbms/src/Databases/DatabaseDictionary.h | 29 +- dbms/src/Databases/DatabaseLazy.cpp | 43 +-- dbms/src/Databases/DatabaseLazy.h | 43 ++- dbms/src/Databases/DatabaseMemory.cpp | 36 ++- dbms/src/Databases/DatabaseMemory.h | 16 +- dbms/src/Databases/DatabaseMySQL.cpp | 4 +- dbms/src/Databases/DatabaseMySQL.h | 4 +- dbms/src/Databases/DatabaseOnDisk.cpp | 278 ++++++++++++++++-- dbms/src/Databases/DatabaseOnDisk.h | 55 +++- dbms/src/Databases/DatabaseOrdinary.cpp | 193 +++++++----- dbms/src/Databases/DatabaseOrdinary.h | 26 +- dbms/src/Databases/DatabasesCommon.cpp | 137 ++++----- dbms/src/Databases/DatabasesCommon.h | 71 +---- dbms/src/Databases/IDatabase.h | 102 ++++++- dbms/src/Dictionaries/DictionaryFactory.cpp | 14 +- dbms/src/Dictionaries/DictionaryFactory.h | 11 +- .../Dictionaries/DictionarySourceFactory.cpp | 2 +- .../Dictionaries/DictionarySourceFactory.h | 4 +- dbms/src/Dictionaries/IDictionary.h | 4 +- dbms/src/Dictionaries/IDictionary_fwd.h | 15 + dbms/src/Interpreters/ActionLocksManager.cpp | 2 +- dbms/src/Interpreters/AsynchronousMetrics.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- .../src/Interpreters/InterpreterDropQuery.cpp | 2 +- .../Interpreters/InterpreterSystemQuery.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 8 +- dbms/src/Storages/StorageMerge.h | 2 +- 30 files changed, 803 insertions(+), 315 deletions(-) create mode 100644 dbms/src/Dictionaries/IDictionary_fwd.h diff --git a/contrib/mariadb-connector-c b/contrib/mariadb-connector-c index 18016300b00..c6503d3acc8 160000 --- a/contrib/mariadb-connector-c +++ b/contrib/mariadb-connector-c @@ -1 +1 @@ -Subproject commit 18016300b00825a3fcbc6fb2aa37ac3e51416f71 +Subproject commit c6503d3acc85ca1a7f5e7e38b605d7c9410aac1e diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index cfa89af96d4..d931f4ada1c 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -459,6 +459,7 @@ namespace ErrorCodes extern const int DICTIONARY_ACCESS_DENIED = 482; extern const int TOO_MANY_REDIRECTS = 483; extern const int INTERNAL_REDIS_ERROR = 484; + extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 500; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index aecc1b9125f..4db63ca7b44 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -27,7 +27,7 @@ DatabaseDictionary::DatabaseDictionary(const String & name_) { } -void DatabaseDictionary::loadTables(Context &, bool) +void DatabaseDictionary::loadStoredObjects(Context &, bool) { } @@ -79,9 +79,9 @@ StoragePtr DatabaseDictionary::tryGetTable( return {}; } -DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & context, const FilterByNameFunction & filter_by_name) +DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_name) { - return std::make_unique(listTables(context, filter_by_name)); + return std::make_unique(listTables(context, filter_by_name)); } bool DatabaseDictionary::empty(const Context & context) const @@ -115,7 +115,7 @@ void DatabaseDictionary::removeTable( throw Exception("DatabaseDictionary: removeTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); } -time_t DatabaseDictionary::getTableMetadataModificationTime( +time_t DatabaseDictionary::getObjectMetadataModificationTime( const Context &, const String &) { diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 1e1af7ef581..ee458ea64c4 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -31,7 +31,7 @@ public: return "Dictionary"; } - void loadTables( + void loadStoredObjects( Context & context, bool has_force_restore_data_flag) override; @@ -39,11 +39,18 @@ public: const Context & context, const String & table_name) const override; + bool isDictionaryExist(const Context & context, const String & table_name) const override; + + StoragePtr tryGetTable( const Context & context, const String & table_name) const override; - DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DictionaryPtr tryGetDictionary(const Context & context, const String & dictionary_name) const override; + + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + + DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; bool empty(const Context & context) const override; @@ -53,14 +60,20 @@ public: const StoragePtr & table, const ASTPtr & query) override; + void createDictionary( + const Context & context, const String & dictionary_name, const DictionaryPtr & dict_ptr, const ASTPtr & query) override; + void removeTable( const Context & context, const String & table_name) override; + void removeDictionary(const Context & context, const String & table_name) override; + void attachTable(const String & table_name, const StoragePtr & table) override; + StoragePtr detachTable(const String & table_name) override; - time_t getTableMetadataModificationTime( + time_t getObjectMetadataModificationTime( const Context & context, const String & table_name) override; @@ -74,6 +87,16 @@ public: ASTPtr getCreateDatabaseQuery(const Context & context) const override; + ASTPtr getCreateDictionaryQuery(const Context & context, const String & table_name) const override; + + ASTPtr tryGetCreateDictionaryQuery(const Context & context, const String & table_name) const override; + + + void attachDictionary(const String & dictionary_name, const DictionaryPtr & table) override; + + DictionaryPtr detachDictionary(const String & dictionary_name) override; + + void shutdown() override; private: diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 35a28e539bc..fcf023d369d 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -43,11 +43,11 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, } -void DatabaseLazy::loadTables( +void DatabaseLazy::loadStoredObjects( Context & /* context */, bool /* has_force_restore_data_flag */) { - DatabaseOnDisk::iterateTableFiles(*this, log, [this](const String & file_name) + DatabaseOnDisk::iterateMetadataFiles(*this, log, [this](const String & file_name) { const std::string table_name = file_name.substr(0, file_name.size() - 4); attachTable(table_name, nullptr); @@ -70,7 +70,7 @@ void DatabaseLazy::createTable( std::lock_guard lock(tables_mutex); auto it = tables_cache.find(table_name); if (it != tables_cache.end()) - it->second.metadata_modification_time = DatabaseOnDisk::getTableMetadataModificationTime(*this, table_name); + it->second.metadata_modification_time = DatabaseOnDisk::getObjectMetadataModificationTime(*this, table_name); } @@ -94,7 +94,7 @@ void DatabaseLazy::renameTable( } -time_t DatabaseLazy::getTableMetadataModificationTime( +time_t DatabaseLazy::getObjectMetadataModificationTime( const Context & /* context */, const String & table_name) { @@ -172,7 +172,7 @@ StoragePtr DatabaseLazy::tryGetTable( return loadTable(context, table_name); } -DatabaseIteratorPtr DatabaseLazy::getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) { std::lock_guard lock(tables_mutex); Strings filtered_tables; @@ -198,7 +198,7 @@ void DatabaseLazy::attachTable(const String & table_name, const StoragePtr & tab auto [it, inserted] = tables_cache.emplace(std::piecewise_construct, std::forward_as_tuple(table_name), - std::forward_as_tuple(table, current_time, DatabaseOnDisk::getTableMetadataModificationTime(*this, table_name))); + std::forward_as_tuple(table, current_time, DatabaseOnDisk::getObjectMetadataModificationTime(*this, table_name))); if (!inserted) throw Exception("Table " + backQuote(getDatabaseName()) + "." + backQuote(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); @@ -267,9 +267,9 @@ String DatabaseLazy::getDatabaseName() const return name; } -String DatabaseLazy::getTableMetadataPath(const String & table_name) const +String DatabaseLazy::getObjectMetadataPath(const String & table_name) const { - return DatabaseOnDisk::getTableMetadataPath(*this, table_name); + return DatabaseOnDisk::getObjectMetadataPath(*this, table_name); } StoragePtr DatabaseLazy::loadTable(const Context & context, const String & table_name) const @@ -280,31 +280,18 @@ StoragePtr DatabaseLazy::loadTable(const Context & context, const String & table const String table_metadata_path = getMetadataPath() + "/" + escapeForFileName(table_name) + ".sql"; - String s; - { - char in_buf[METADATA_FILE_BUFFER_SIZE]; - ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE, -1, in_buf); - readStringUntilEOF(s, in); - } - - /** Empty files with metadata are generated after a rough restart of the server. - * Remove these files to slightly reduce the work of the admins on startup. - */ - if (s.empty()) - { - LOG_ERROR(log, "LoadTable: File " << table_metadata_path << " is empty. Removing."); - Poco::File(table_metadata_path).remove(); - return nullptr; - } - try { String table_name_; StoragePtr table; Context context_copy(context); /// some tables can change context, but not LogTables - std::tie(table_name_, table) = createTableFromDefinition( - s, name, getDataPath(), context_copy, false, "in file " + table_metadata_path); - if (!endsWith(table->getName(), "Log")) + + auto ast = parseCreateQueryFromMetadataFile(table_metadata_path, log); + if (ast) + std::tie(table_name_, table) = createTableFromAST( + ast->as(), name, getDataPath(), context_copy, false); + + if (!ast || !endsWith(table->getName(), "Log")) throw Exception("Only *Log tables can be used with Lazy database engine.", ErrorCodes::LOGICAL_ERROR); { std::lock_guard lock(tables_mutex); diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index 649116508b3..a4328f6b46b 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -2,11 +2,13 @@ #include #include +#include namespace DB { + class DatabaseLazyIterator; /** Lazy engine of databases. @@ -20,7 +22,7 @@ public: String getEngineName() const override { return "Lazy"; } - void loadTables( + void loadStoredObjects( Context & context, bool has_force_restore_data_flag) override; @@ -30,10 +32,20 @@ public: const StoragePtr & table, const ASTPtr & query) override; + void createDictionary( + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dict_ptr, + const ASTPtr & query) override; + void removeTable( const Context & context, const String & table_name) override; + void removeDictionary( + const Context & context, + const String & table_name) override; + void renameTable( const Context & context, const String & table_name, @@ -49,7 +61,7 @@ public: const ConstraintsDescription & constraints, const ASTModifier & engine_modifier) override; - time_t getTableMetadataModificationTime( + time_t getObjectMetadataModificationTime( const Context & context, const String & table_name) override; @@ -61,12 +73,20 @@ public: const Context & context, const String & table_name) const override; + ASTPtr getCreateDictionaryQuery( + const Context & context, + const String & table_name) const override; + + ASTPtr tryGetCreateDictionaryQuery( + const Context & context, + const String & table_name) const override; + ASTPtr getCreateDatabaseQuery(const Context & context) const override; String getDataPath() const override; String getDatabaseName() const override; String getMetadataPath() const override; - String getTableMetadataPath(const String & table_name) const override; + String getObjectMetadataPath(const String & table_name) const override; void drop() override; @@ -74,18 +94,30 @@ public: const Context & context, const String & table_name) const override; + bool isDictionaryExist( + const Context & context, + const String & table_name) const override; + StoragePtr tryGetTable( const Context & context, const String & table_name) const override; + DictionaryPtr tryGetDictionary(const Context & context, const String & dictionary_name) const override; + bool empty(const Context & context) const override; - DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + + DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; void attachTable(const String & table_name, const StoragePtr & table) override; StoragePtr detachTable(const String & table_name) override; + void attachDictionary(const String & dictionary_name, const DictionaryPtr & table) override; + + DictionaryPtr detachDictionary(const String & dictionary_name) override; + void shutdown() override; ~DatabaseLazy() override; @@ -138,7 +170,7 @@ private: }; -class DatabaseLazyIterator final : public IDatabaseIterator +class DatabaseLazyIterator final : public IDatabaseTablesIterator { public: DatabaseLazyIterator(DatabaseLazy & database_, const Context & context_, Strings && table_names_); @@ -155,5 +187,4 @@ private: Strings::const_iterator iterator; mutable StoragePtr current_storage; }; - } diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 1356a28d245..a3cb5f65010 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -9,6 +9,8 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY; + extern const int NOT_IMPLEMENTED; } DatabaseMemory::DatabaseMemory(String name_) @@ -16,7 +18,7 @@ DatabaseMemory::DatabaseMemory(String name_) , log(&Logger::get("DatabaseMemory(" + name + ")")) {} -void DatabaseMemory::loadTables( +void DatabaseMemory::loadStoredObjects( Context & /*context*/, bool /*has_force_restore_data_flag*/) { @@ -32,6 +34,17 @@ void DatabaseMemory::createTable( attachTable(table_name, table); } + +void DatabaseMemory::createDictionary( + const Context & /*context*/, + const String & dictionary_name, + const DictionaryPtr & dictionary, + const ASTPtr & /*query*/) +{ + attachDictionary(dictionary_name, dictionary); +} + + void DatabaseMemory::removeTable( const Context & /*context*/, const String & table_name) @@ -39,9 +52,17 @@ void DatabaseMemory::removeTable( detachTable(table_name); } -time_t DatabaseMemory::getTableMetadataModificationTime( + +void DatabaseMemory::removeDictionary( const Context &, - const String &) + const String & dictionary_name) +{ + detachDictionary(dictionary_name); +} + + +time_t DatabaseMemory::getObjectMetadataModificationTime( + const Context &, const String &) { return static_cast(0); } @@ -53,6 +74,15 @@ ASTPtr DatabaseMemory::getCreateTableQuery( throw Exception("There is no CREATE TABLE query for DatabaseMemory tables", ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY); } + +ASTPtr DatabaseMemory::getCreateDictionaryQuery( + const Context &, + const String &) const +{ + throw Exception("There is no CREATE DICTIONARY query for DatabaseMemory dictionaries", ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY); +} + + ASTPtr DatabaseMemory::getCreateDatabaseQuery( const Context &) const { diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index 33bb8787168..6a52d02f949 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -23,7 +23,7 @@ public: String getEngineName() const override { return "Memory"; } - void loadTables( + void loadStoredObjects( Context & context, bool has_force_restore_data_flag) override; @@ -33,16 +33,26 @@ public: const StoragePtr & table, const ASTPtr & query) override; + void createDictionary( + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dictionary, + const ASTPtr & query) override; + void removeTable( const Context & context, const String & table_name) override; - time_t getTableMetadataModificationTime( + void removeDictionary( const Context & context, - const String & table_name) override; + const String & dictionary_name) override; + + time_t getObjectMetadataModificationTime(const Context & context, const String & table_name) override; ASTPtr getCreateTableQuery(const Context & context, const String & table_name) const override; + ASTPtr getCreateDictionaryQuery(const Context & context, const String & table_name) const override; ASTPtr tryGetCreateTableQuery(const Context &, const String &) const override { return nullptr; } + ASTPtr tryGetCreateDictionaryQuery(const Context &, const String &) const override { return nullptr; } ASTPtr getCreateDatabaseQuery(const Context & context) const override; diff --git a/dbms/src/Databases/DatabaseMySQL.cpp b/dbms/src/Databases/DatabaseMySQL.cpp index 9fd16cc94a8..491dfba1bcf 100644 --- a/dbms/src/Databases/DatabaseMySQL.cpp +++ b/dbms/src/Databases/DatabaseMySQL.cpp @@ -64,7 +64,7 @@ bool DatabaseMySQL::empty(const Context &) const return local_tables_cache.empty(); } -DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &, const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name) { Tables tables; std::lock_guard lock(mutex); @@ -75,7 +75,7 @@ DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &, const FilterByNa if (!filter_by_table_name || filter_by_table_name(local_table.first)) tables[local_table.first] = local_table.second.storage; - return std::make_unique(tables); + return std::make_unique(tables); } bool DatabaseMySQL::isTableExist(const Context & context, const String & name) const diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index 7044a594b4c..89500c84c21 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -29,7 +29,7 @@ public: bool empty(const Context & context) const override; - DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; ASTPtr getCreateDatabaseQuery(const Context & context) const override; @@ -48,7 +48,7 @@ public: throw Exception("MySQL database engine does not support detach table.", ErrorCodes::NOT_IMPLEMENTED); } - void loadTables(Context &, bool) override + void loadStoredObjects(Context &, bool) override { /// do nothing } diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index 941935f37f4..5b55f9a402c 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -1,37 +1,47 @@ #include -#include -#include -#include #include #include #include #include +#include +#include #include -#include #include +#include +#include #include +#include +#include +#include #include #include + + namespace DB { +static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; + namespace ErrorCodes { extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY; extern const int FILE_DOESNT_EXIST; extern const int INCORRECT_FILE_NAME; extern const int SYNTAX_ERROR; extern const int TABLE_ALREADY_EXISTS; extern const int UNKNOWN_TABLE; + extern const int DICTIONARY_ALREADY_EXISTS; + extern const int EMPTY_LIST_OF_COLUMNS_PASSED; } namespace detail { - String getTableMetadataPath(const String & base_path, const String & table_name) + String getObjectMetadataPath(const String & base_path, const String & table_name) { return base_path + (endsWith(base_path, "/") ? "" : "/") + escapeForFileName(table_name) + ".sql"; } @@ -85,6 +95,98 @@ namespace detail } } + +ASTPtr parseCreateQueryFromMetadataFile(const String & filepath, Poco::Logger * log) +{ + String definition; + { + char in_buf[METADATA_FILE_BUFFER_SIZE]; + ReadBufferFromFile in(filepath, METADATA_FILE_BUFFER_SIZE, -1, in_buf); + readStringUntilEOF(definition, in); + } + + /** Empty files with metadata are generated after a rough restart of the server. + * Remove these files to slightly reduce the work of the admins on startup. + */ + if (definition.empty()) + { + LOG_ERROR(log, "File " << filepath << " is empty. Removing."); + Poco::File(filepath).remove(); + return nullptr; + } + + ParserCreateQuery parser_table; + ASTPtr result = parseQuery(parser_table, definition, "in file " + filepath, 0); + return result; +} + + + +std::pair createTableFromAST( + ASTCreateQuery ast_create_query, + const String & database_name, + const String & database_data_path, + Context & context, + bool has_force_restore_data_flag) +{ + ast_create_query.attach = true; + ast_create_query.database = database_name; + + if (ast_create_query.as_table_function) + { + const auto & table_function = ast_create_query.as_table_function->as(); + const auto & factory = TableFunctionFactory::instance(); + StoragePtr storage = factory.get(table_function.name, context)->execute(ast_create_query.as_table_function, context, ast_create_query.table); + return {ast_create_query.table, storage}; + } + /// We do not directly use `InterpreterCreateQuery::execute`, because + /// - the database has not been created yet; + /// - the code is simpler, since the query is already brought to a suitable form. + if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns) + throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + + ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context); + ConstraintsDescription constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints); + + return + { + ast_create_query.table, + StorageFactory::instance().get( + ast_create_query, + database_data_path, ast_create_query.table, database_name, context, context.getGlobalContext(), + columns, constraints, + true, has_force_restore_data_flag) + }; +} + + +String getObjectDefinitionFromCreateQuery(const ASTPtr & query) +{ + ASTPtr query_clone = query->clone(); + auto & create = query_clone->as(); + + /// We remove everything that is not needed for ATTACH from the query. + create.attach = true; + create.database.clear(); + create.as_database.clear(); + create.as_table.clear(); + create.if_not_exists = false; + create.is_populate = false; + create.replace_view = false; + + /// For views it is necessary to save the SELECT query itself, for the rest - on the contrary + if (!create.is_view && !create.is_materialized_view && !create.is_live_view) + create.select = nullptr; + + create.format = nullptr; + create.out_file = nullptr; + + std::ostringstream statement_stream; + formatAST(create, statement_stream, false); + statement_stream << '\n'; + return statement_stream.str(); +} + void DatabaseOnDisk::createTable( IDatabase & database, const Context & context, @@ -106,15 +208,19 @@ void DatabaseOnDisk::createTable( /// A race condition would be possible if a table with the same name is simultaneously created using CREATE and using ATTACH. /// But there is protection from it - see using DDLGuard in InterpreterCreateQuery. + if (database.isDictionaryExist(context, table_name)) + throw Exception("Dictionary " + backQuote(database.getDatabaseName()) + "." + backQuote(table_name) + " already exists.", + ErrorCodes::DICTIONARY_ALREADY_EXISTS); + if (database.isTableExist(context, table_name)) throw Exception("Table " + backQuote(database.getDatabaseName()) + "." + backQuote(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); - String table_metadata_path = database.getTableMetadataPath(table_name); + String table_metadata_path = database.getObjectMetadataPath(table_name); String table_metadata_tmp_path = table_metadata_path + ".tmp"; String statement; { - statement = getTableDefinitionFromCreateQuery(query); + statement = getObjectDefinitionFromCreateQuery(query); /// Exclusive flags guarantees, that table is not created right now in another thread. Otherwise, exception will be thrown. WriteBufferFromFile out(table_metadata_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); @@ -141,6 +247,68 @@ void DatabaseOnDisk::createTable( } } + +void DatabaseOnDisk::createDictionary( + IDatabase & database, + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dictionary, + const ASTPtr & query) +{ + const auto & settings = context.getSettingsRef(); + + /// Create a file with metadata if necessary - if the query is not ATTACH. + /// Write the query of `ATTACH table` to it. + + /** The code is based on the assumption that all threads share the same order of operations + * - creating the .sql.tmp file; + * - adding a table to `tables`; + * - rename .sql.tmp to .sql. + */ + + /// A race condition would be possible if a table with the same name is simultaneously created using CREATE and using ATTACH. + /// But there is protection from it - see using DDLGuard in InterpreterCreateQuery. + + if (database.isDictionaryExist(context, dictionary_name)) + throw Exception("Dictionary " + backQuote(database.getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); + + if (database.isTableExist(context, dictionary_name)) + throw Exception("Table " + backQuote(database.getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); + + + String dictionary_metadata_path = database.getObjectMetadataPath(dictionary_name); + String dictionary_metadata_tmp_path = dictionary_metadata_path + ".tmp"; + String statement; + + { + statement = getObjectDefinitionFromCreateQuery(query); + + /// Exclusive flags guarantees, that table is not created right now in another thread. Otherwise, exception will be thrown. + WriteBufferFromFile out(dictionary_metadata_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(statement, out); + out.next(); + if (settings.fsync_metadata) + out.sync(); + out.close(); + } + + try + { + /// Add a table to the map of known tables. + database.attachDictionary(dictionary_name, dictionary); + + /// If it was ATTACH query and file with table metadata already exist + /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one. + Poco::File(dictionary_metadata_tmp_path).renameTo(dictionary_metadata_path); + } + catch (...) + { + Poco::File(dictionary_metadata_tmp_path).remove(); + throw; + } +} + + void DatabaseOnDisk::removeTable( IDatabase & database, const Context & /* context */, @@ -149,7 +317,7 @@ void DatabaseOnDisk::removeTable( { StoragePtr res = database.detachTable(table_name); - String table_metadata_path = database.getTableMetadataPath(table_name); + String table_metadata_path = database.getObjectMetadataPath(table_name); try { @@ -171,12 +339,47 @@ void DatabaseOnDisk::removeTable( } } -ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const IDatabase & database, const Context & context, - const String & table_name, bool throw_on_error) + +void DatabaseOnDisk::removeDictionary( + IDatabase & database, + const Context & /*context*/, + const String & dictionary_name, + Poco::Logger * log) +{ + DictionaryPtr res = database.detachDictionary(dictionary_name); + + String dictionary_metadata_path = database.getObjectMetadataPath(dictionary_name); + + try + { + Poco::File(dictionary_metadata_path).remove(); + } + catch (...) + { + try + { + Poco::File(dictionary_metadata_path + ".tmp_drop").remove(); + return; + } + catch (...) + { + LOG_WARNING(log, getCurrentExceptionMessage(__PRETTY_FUNCTION__)); + } + database.attachDictionary(dictionary_name, res); + throw; + } +} + + +ASTPtr DatabaseOnDisk::getCreateTableQueryImpl( + const IDatabase & database, + const Context & context, + const String & table_name, + bool throw_on_error) { ASTPtr ast; - auto table_metadata_path = detail::getTableMetadataPath(database.getMetadataPath(), table_name); + auto table_metadata_path = detail::getObjectMetadataPath(database.getMetadataPath(), table_name); ast = detail::getCreateQueryFromMetadata(table_metadata_path, database.getDatabaseName(), throw_on_error); if (!ast && throw_on_error) { @@ -193,6 +396,30 @@ ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const IDatabase & database, const return ast; } + +ASTPtr DatabaseOnDisk::getCreateDictionaryQueryImpl( + const IDatabase & database, + const Context & context, + const String & dictionary_name, + bool throw_on_error) +{ + ASTPtr ast; + + auto dictionary_metadata_path = detail::getObjectMetadataPath(database.getMetadataPath(), dictionary_name); + ast = detail::getCreateQueryFromMetadata(dictionary_metadata_path, database.getDatabaseName(), throw_on_error); + if (!ast && throw_on_error) + { + /// Handle system.* tables for which there are no table.sql files. + bool has_table = database.tryGetDictionary(context, dictionary_name) != nullptr; + + auto msg = has_table ? "There is no CREATE DICTIONARY query for table " : "There is no metadata file for dictionary "; + + throw Exception(msg + backQuote(dictionary_name), ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY); + } + + return ast; +} + ASTPtr DatabaseOnDisk::getCreateTableQuery(const IDatabase & database, const Context & context, const String & table_name) { return getCreateTableQueryImpl(database, context, table_name, true); @@ -203,6 +430,17 @@ ASTPtr DatabaseOnDisk::tryGetCreateTableQuery(const IDatabase & database, const return getCreateTableQueryImpl(database, context, table_name, false); } + +ASTPtr DatabaseOnDisk::getCreateDictionaryQuery(const IDatabase & database, const Context & context, const String & dictionary_name) +{ + return getCreateDictionaryQueryImpl(database, context, dictionary_name, true); +} + +ASTPtr DatabaseOnDisk::tryGetCreateDictionaryQuery(const IDatabase & database, const Context & context, const String & dictionary_name) +{ + return getCreateDictionaryQueryImpl(database, context, dictionary_name, false); +} + ASTPtr DatabaseOnDisk::getCreateDatabaseQuery(const IDatabase & database, const Context & /*context*/) { ASTPtr ast; @@ -226,16 +464,16 @@ void DatabaseOnDisk::drop(const IDatabase & database) Poco::File(database.getMetadataPath()).remove(false); } -String DatabaseOnDisk::getTableMetadataPath(const IDatabase & database, const String & table_name) +String DatabaseOnDisk::getObjectMetadataPath(const IDatabase & database, const String & table_name) { - return detail::getTableMetadataPath(database.getMetadataPath(), table_name); + return detail::getObjectMetadataPath(database.getMetadataPath(), table_name); } -time_t DatabaseOnDisk::getTableMetadataModificationTime( +time_t DatabaseOnDisk::getObjectMetadataModificationTime( const IDatabase & database, const String & table_name) { - String table_metadata_path = getTableMetadataPath(database, table_name); + String table_metadata_path = getObjectMetadataPath(database, table_name); Poco::File meta_file(table_metadata_path); if (meta_file.exists()) @@ -248,7 +486,7 @@ time_t DatabaseOnDisk::getTableMetadataModificationTime( } } -void DatabaseOnDisk::iterateTableFiles(const IDatabase & database, Poco::Logger * log, const IteratingFunction & iterating_function) +void DatabaseOnDisk::iterateMetadataFiles(const IDatabase & database, Poco::Logger * log, const IteratingFunction & iterating_function) { Poco::DirectoryIterator dir_end; for (Poco::DirectoryIterator dir_it(database.getMetadataPath()); dir_it != dir_end; ++dir_it) @@ -265,11 +503,11 @@ void DatabaseOnDisk::iterateTableFiles(const IDatabase & database, Poco::Logger static const char * tmp_drop_ext = ".sql.tmp_drop"; if (endsWith(dir_it.name(), tmp_drop_ext)) { - const std::string table_name = dir_it.name().substr(0, dir_it.name().size() - strlen(tmp_drop_ext)); - if (Poco::File(database.getDataPath() + '/' + table_name).exists()) + const std::string object_name = dir_it.name().substr(0, dir_it.name().size() - strlen(tmp_drop_ext)); + if (Poco::File(database.getDataPath() + '/' + object_name).exists()) { - Poco::File(dir_it->path()).renameTo(table_name + ".sql"); - LOG_WARNING(log, "Table " << backQuote(table_name) << " was not dropped previously"); + Poco::File(dir_it->path()).renameTo(object_name + ".sql"); + LOG_WARNING(log, "Object " << backQuote(object_name) << " was not dropped previously"); } else { diff --git a/dbms/src/Databases/DatabaseOnDisk.h b/dbms/src/Databases/DatabaseOnDisk.h index 761d55bd90b..ad4bf4d2463 100644 --- a/dbms/src/Databases/DatabaseOnDisk.h +++ b/dbms/src/Databases/DatabaseOnDisk.h @@ -18,6 +18,22 @@ namespace detail ASTPtr getCreateQueryFromMetadata(const String & metadata_path, const String & database, bool throw_on_error); } +ASTPtr parseCreateQueryFromMetadataFile(const String & filepath, Poco::Logger * log); + +std::pair createTableFromAST( + ASTCreateQuery ast_create_query, + const String & database_name, + const String & database_data_path, + Context & context, + bool has_force_restore_data_flag); + +/** Get the row with the table definition based on the CREATE query. + * It is an ATTACH query that you can execute to create a table from the correspondent database. + * See the implementation. + */ +String getObjectDefinitionFromCreateQuery(const ASTPtr & query); + + /* Class to provide basic operations with tables when metadata is stored on disk in .sql files. */ class DatabaseOnDisk @@ -30,12 +46,25 @@ public: const StoragePtr & table, const ASTPtr & query); + static void createDictionary( + IDatabase & database, + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dictionary, + const ASTPtr & query); + static void removeTable( IDatabase & database, const Context & context, const String & table_name, Poco::Logger * log); + static void removeDictionary( + IDatabase & database, + const Context & context, + const String & dictionary_name, + Poco::Logger * log); + template static void renameTable( IDatabase & database, @@ -55,23 +84,33 @@ public: const Context & context, const String & table_name); + static ASTPtr getCreateDictionaryQuery( + const IDatabase & database, + const Context & context, + const String & dictionary_name); + + static ASTPtr tryGetCreateDictionaryQuery( + const IDatabase & database, + const Context & context, + const String & dictionary_name); + static ASTPtr getCreateDatabaseQuery( const IDatabase & database, const Context & context); static void drop(const IDatabase & database); - static String getTableMetadataPath( + static String getObjectMetadataPath( const IDatabase & database, - const String & table_name); + const String & object_name); - static time_t getTableMetadataModificationTime( + static time_t getObjectMetadataModificationTime( const IDatabase & database, - const String & table_name); + const String & object_name); using IteratingFunction = std::function; - static void iterateTableFiles(const IDatabase & database, Poco::Logger * log, const IteratingFunction & iterating_function); + static void iterateMetadataFiles(const IDatabase & database, Poco::Logger * log, const IteratingFunction & iterating_function); private: static ASTPtr getCreateTableQueryImpl( @@ -79,6 +118,12 @@ private: const Context & context, const String & table_name, bool throw_on_error); + + static ASTPtr getCreateDictionaryQueryImpl( + const IDatabase & database, + const Context & context, + const String & dictionary_name, + bool throw_on_error); }; diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 8dbeab29f41..2eac82bba82 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -13,8 +13,13 @@ #include #include #include +#include +#include +#include #include +#include #include +#include #include #include @@ -33,59 +38,82 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_CREATE_TABLE_FROM_METADATA; + extern const int CANNOT_CREATE_DICTIONARY_FROM_METADATA; + extern const int EMPTY_LIST_OF_COLUMNS_PASSED; + extern const int CANNOT_PARSE_TEXT; } -static constexpr size_t PRINT_MESSAGE_EACH_N_TABLES = 256; +static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5; static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; -static void loadTable( - Context & context, - const String & database_metadata_path, - DatabaseOrdinary & database, - const String & database_name, - const String & database_data_path, - const String & file_name, - bool has_force_restore_data_flag) +namespace { - Logger * log = &Logger::get("loadTable"); - const String table_metadata_path = database_metadata_path + "/" + file_name; - String s; +std::pair createDictionaryFromAST( + ASTCreateQuery ast_create_query, + const String & database_name, + const Context & context) +{ + ast_create_query.database = database_name; + + if (!ast_create_query.dictionary_attributes_list) + throw Exception("Missing definition of dictionary attributes.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + + return { - char in_buf[METADATA_FILE_BUFFER_SIZE]; - ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE, -1, in_buf); - readStringUntilEOF(s, in); - } + ast_create_query.table, + DictionaryFactory::instance().create(ast_create_query.table, ast_create_query, context) + }; +} - /** Empty files with metadata are generated after a rough restart of the server. - * Remove these files to slightly reduce the work of the admins on startup. - */ - if (s.empty()) +void loadObject( + Context & context, + const ASTCreateQuery & query, + DatabaseOrdinary & database, + const String database_data_path, + const String & database_name, + bool has_force_restore_data_flag) +try +{ + if (query.is_dictionary) { - LOG_ERROR(log, "File " << table_metadata_path << " is empty. Removing."); - Poco::File(table_metadata_path).remove(); - return; + String dictionary_name; + DictionaryPtr dictionary; + std::tie(dictionary_name, dictionary) = createDictionaryFromAST(query, database_name, context); + database.attachDictionary(dictionary_name, dictionary); } - - try + else { String table_name; StoragePtr table; - std::tie(table_name, table) = createTableFromDefinition( - s, database_name, database_data_path, context, has_force_restore_data_flag, "in file " + table_metadata_path); + std::tie(table_name, table) + = createTableFromAST(query, database_name, database_data_path, context, has_force_restore_data_flag); database.attachTable(table_name, table); } - catch (const Exception & e) +} +catch (const Exception & e) +{ + throw Exception( + "Cannot create object '" + query.table + "' from query " + serializeAST(query) + ", error: " + e.displayText() + ", stack trace:\n" + + e.getStackTrace().toString(), + ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); +} + + +void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch) +{ + if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) { - throw Exception("Cannot create table from metadata file " + table_metadata_path + ", error: " + e.displayText() + - ", stack trace:\n" + e.getStackTrace().toString(), - ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); + LOG_INFO(log, std::fixed << std::setprecision(2) << processed * 100.0 / total << "%"); + watch.restart(); } } +} + DatabaseOrdinary::DatabaseOrdinary(String name_, const String & metadata_path_, const Context & context) : DatabaseWithOwnTablesBase(std::move(name_)) @@ -97,52 +125,66 @@ DatabaseOrdinary::DatabaseOrdinary(String name_, const String & metadata_path_, } -void DatabaseOrdinary::loadTables( +void DatabaseOrdinary::loadStoredObjects( Context & context, bool has_force_restore_data_flag) { - using FileNames = std::vector; + /** Tables load faster if they are loaded in sorted (by name) order. + * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, + * which does not correspond to order tables creation and does not correspond to order of their location on disk. + */ + using FileNames = std::map; FileNames file_names; - DatabaseOnDisk::iterateTableFiles(*this, log, [&file_names](const String & file_name) + size_t total_dictionaries = 0; + DatabaseOnDisk::iterateMetadataFiles(*this, log, [&file_names, &total_dictionaries, this](const String & file_name) { - file_names.push_back(file_name); + String full_path = metadata_path + "/" + file_name; + try + { + auto ast = parseCreateQueryFromMetadataFile(full_path, log); + if (ast) + { + auto create_query = ast->as(); + file_names[file_name] = create_query; + total_dictionaries += create_query.is_dictionary; + } + } + catch (const Exception & e) + { + throw Exception( + "Cannot parse definition from metadata file " + full_path + ", error: " + e.displayText() + ", stack trace:\n" + + e.getStackTrace().toString(), ErrorCodes::CANNOT_PARSE_TEXT); + } + }); if (file_names.empty()) return; - /** Tables load faster if they are loaded in sorted (by name) order. - * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, - * which does not correspond to order tables creation and does not correspond to order of their location on disk. - */ - std::sort(file_names.begin(), file_names.end()); + size_t total_tables = file_names.size() - total_dictionaries; - const size_t total_tables = file_names.size(); - LOG_INFO(log, "Total " << total_tables << " tables."); + LOG_INFO(log, "Total " << total_tables << " tables and " << total_dictionaries << " dictionaries."); AtomicStopwatch watch; - std::atomic tables_processed {0}; + std::atomic tables_processed{0}; + std::atomic dictionaries_processed{0}; - auto loadOneTable = [&](const String & table) + auto loadOneObject = [&](const ASTCreateQuery & query) { - loadTable(context, getMetadataPath(), *this, getDatabaseName(), getDataPath(), table, has_force_restore_data_flag); + loadObject(context, query, *this, getDataPath(), getDatabaseName(), has_force_restore_data_flag); /// Messages, so that it's not boring to wait for the server to load for a long time. - if (++tables_processed % PRINT_MESSAGE_EACH_N_TABLES == 0 - || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) - { - LOG_INFO(log, std::fixed << std::setprecision(2) << tables_processed * 100.0 / total_tables << "%"); - watch.restart(); - } + if (query.is_dictionary) + logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch); + else + logAboutProgress(log, ++tables_processed, total_tables, watch); }; ThreadPool pool(SettingMaxThreads().getAutoValue()); - for (const auto & file_name : file_names) - { - pool.schedule([&]() { loadOneTable(file_name); }); - } + for (const auto & file_with_ast : file_names) + pool.schedule([&]() { loadOneObject(file_with_ast.second); }); pool.wait(); @@ -160,24 +202,16 @@ void DatabaseOrdinary::startupTables(ThreadPool & thread_pool) return; AtomicStopwatch watch; - std::atomic tables_processed {0}; + std::atomic tables_processed{0}; auto startupOneTable = [&](const StoragePtr & table) { table->startup(); - - if (++tables_processed % PRINT_MESSAGE_EACH_N_TABLES == 0 - || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) - { - LOG_INFO(log, std::fixed << std::setprecision(2) << tables_processed * 100.0 / total_tables << "%"); - watch.restart(); - } + logAboutProgress(log, ++tables_processed, total_tables, watch); }; for (const auto & table : tables) - { thread_pool.schedule([&]() { startupOneTable(table.second); }); - } thread_pool.wait(); } @@ -192,6 +226,14 @@ void DatabaseOrdinary::createTable( DatabaseOnDisk::createTable(*this, context, table_name, table, query); } +void DatabaseOrdinary::createDictionary( + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dictionary, + const ASTPtr & query) +{ + DatabaseOnDisk::createDictionary(*this, context, dictionary_name, dictionary, query); +} void DatabaseOrdinary::removeTable( const Context & context, @@ -211,11 +253,11 @@ void DatabaseOrdinary::renameTable( } -time_t DatabaseOrdinary::getTableMetadataModificationTime( +time_t DatabaseOrdinary::getObjectMetadataModificationTime( const Context & /* context */, const String & table_name) { - return DatabaseOnDisk::getTableMetadataModificationTime(*this, table_name); + return DatabaseOnDisk::getObjectMetadataModificationTime(*this, table_name); } ASTPtr DatabaseOrdinary::getCreateTableQuery(const Context & context, const String & table_name) const @@ -228,6 +270,17 @@ ASTPtr DatabaseOrdinary::tryGetCreateTableQuery(const Context & context, const S return DatabaseOnDisk::tryGetCreateTableQuery(*this, context, table_name); } + +ASTPtr DatabaseOrdinary::getCreateDictionaryQuery(const Context & context, const String & dictionary_name) const +{ + return DatabaseOnDisk::getCreateDictionaryQuery(*this, context, dictionary_name); +} + +ASTPtr DatabaseOrdinary::tryGetCreateDictionaryQuery(const Context & context, const String & dictionary_name) const +{ + return DatabaseOnDisk::tryGetCreateTableQuery(*this, context, dictionary_name); +} + ASTPtr DatabaseOrdinary::getCreateDatabaseQuery(const Context & context) const { return DatabaseOnDisk::getCreateDatabaseQuery(*this, context); @@ -278,7 +331,7 @@ void DatabaseOrdinary::alterTable( if (storage_modifier) storage_modifier(*ast_create_query.storage); - statement = getTableDefinitionFromCreateQuery(ast); + statement = getObjectDefinitionFromCreateQuery(ast); { WriteBufferFromFile out(table_metadata_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); @@ -323,9 +376,9 @@ String DatabaseOrdinary::getDatabaseName() const return name; } -String DatabaseOrdinary::getTableMetadataPath(const String & table_name) const +String DatabaseOrdinary::getObjectMetadataPath(const String & table_name) const { - return detail::getTableMetadataPath(getMetadataPath(), table_name); + return DatabaseOnDisk::getObjectMetadataPath(*this, table_name); } } diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index d8f7e1b3612..7a381489a1f 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -18,7 +18,7 @@ public: String getEngineName() const override { return "Ordinary"; } - void loadTables( + void loadStoredObjects( Context & context, bool has_force_restore_data_flag) override; @@ -28,10 +28,20 @@ public: const StoragePtr & table, const ASTPtr & query) override; + void createDictionary( + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dict_ptr, + const ASTPtr & query) override; + void removeTable( const Context & context, const String & table_name) override; + void removeDictionary( + const Context & context, + const String & table_name) override; + void renameTable( const Context & context, const String & table_name, @@ -47,7 +57,7 @@ public: const ConstraintsDescription & constraints, const ASTModifier & engine_modifier) override; - time_t getTableMetadataModificationTime( + time_t getObjectMetadataModificationTime( const Context & context, const String & table_name) override; @@ -59,12 +69,20 @@ public: const Context & context, const String & table_name) const override; + ASTPtr tryGetCreateDictionaryQuery( + const Context & context, + const String & name) const override; + + ASTPtr getCreateDictionaryQuery( + const Context & context, + const String & name) const override; + ASTPtr getCreateDatabaseQuery(const Context & context) const override; String getDataPath() const override; String getDatabaseName() const override; String getMetadataPath() const override; - String getTableMetadataPath(const String & table_name) const override; + String getObjectMetadataPath(const String & table_name) const override; void drop() override; @@ -74,8 +92,6 @@ private: Poco::Logger * log; void startupTables(ThreadPool & thread_pool); - - ASTPtr getCreateTableQueryImpl(const Context & context, const String & table_name, bool throw_on_error) const; }; } diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 1a278fbb03d..db625ac62af 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -4,12 +4,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include @@ -25,78 +27,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - -String getTableDefinitionFromCreateQuery(const ASTPtr & query) -{ - ASTPtr query_clone = query->clone(); - auto & create = query_clone->as(); - - /// We remove everything that is not needed for ATTACH from the query. - create.attach = true; - create.database.clear(); - create.as_database.clear(); - create.as_table.clear(); - create.if_not_exists = false; - create.is_populate = false; - create.replace_view = false; - - /// For views it is necessary to save the SELECT query itself, for the rest - on the contrary - if (!create.is_view && !create.is_materialized_view && !create.is_live_view) - create.select = nullptr; - - create.format = nullptr; - create.out_file = nullptr; - - std::ostringstream statement_stream; - formatAST(create, statement_stream, false); - statement_stream << '\n'; - return statement_stream.str(); -} - - -std::pair createTableFromDefinition( - const String & definition, - const String & database_name, - const String & database_data_path, - Context & context, - bool has_force_restore_data_flag, - const String & description_for_error_message) -{ - ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, definition.data(), definition.data() + definition.size(), description_for_error_message, 0); - - auto & ast_create_query = ast->as(); - ast_create_query.attach = true; - ast_create_query.database = database_name; - - if (ast_create_query.as_table_function) - { - const auto & table_function = ast_create_query.as_table_function->as(); - const auto & factory = TableFunctionFactory::instance(); - StoragePtr storage = factory.get(table_function.name, context)->execute(ast_create_query.as_table_function, context, ast_create_query.table); - return {ast_create_query.table, storage}; - } - /// We do not directly use `InterpreterCreateQuery::execute`, because - /// - the database has not been created yet; - /// - the code is simpler, since the query is already brought to a suitable form. - if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns) - throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - - ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context); - ConstraintsDescription constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints); - - return - { - ast_create_query.table, - StorageFactory::instance().get( - ast_create_query, - database_data_path, ast_create_query.table, database_name, context, context.getGlobalContext(), - columns, constraints, - true, has_force_restore_data_flag) - }; -} - - bool DatabaseWithOwnTablesBase::isTableExist( const Context & /*context*/, const String & table_name) const @@ -105,6 +35,14 @@ bool DatabaseWithOwnTablesBase::isTableExist( return tables.find(table_name) != tables.end(); } +bool DatabaseWithOwnTablesBase::isDictionaryExist( + const Context & /*context*/, + const String & dictionary_name) const +{ + std::lock_guard lock(mutex); + return dictionaries.find(dictionary_name) != dictionaries.end(); +} + StoragePtr DatabaseWithOwnTablesBase::tryGetTable( const Context & /*context*/, const String & table_name) const @@ -116,22 +54,46 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable( return it->second; } -DatabaseIteratorPtr DatabaseWithOwnTablesBase::getIterator(const Context & /*context*/, const FilterByNameFunction & filter_by_table_name) +DictionaryPtr DatabaseWithOwnTablesBase::tryGetDictionary(const Context & /*context*/, const String & dictionary_name) const +{ + std::lock_guard dict_lock{mutex}; + auto it = dictionaries.find(dictionary_name); + if (it == dictionaries.end()) + return {}; + + return it->second; +} + +DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(const Context & /*context*/, const FilterByNameFunction & filter_by_table_name) { std::lock_guard lock(mutex); if (!filter_by_table_name) - return std::make_unique(tables); + return std::make_unique(tables); Tables filtered_tables; for (const auto & [table_name, storage] : tables) if (filter_by_table_name(table_name)) filtered_tables.emplace(table_name, storage); - return std::make_unique(std::move(filtered_tables)); + return std::make_unique(std::move(filtered_tables)); +} + + +DatabaseDictionariesIteratorPtr DatabaseWithOwnTablesBase::getDictionariesIterator(const Context & /*context*/, const FilterByNameFunction & filter_by_dictionary_name) +{ + std::lock_guard lock(mutex); + if (!filter_by_dictionary_name) + return std::make_unique(dictionaries); + + Dictionaries filtered_dictionaries; + for (const auto & [dictionary_name, dictionary] : dictionaries) + if (filter_by_dictionary_name(dictionary_name)) + filtered_dictionaries.emplace(dictionary_name, dictionary); + return std::make_unique(std::move(filtered_dictionaries)); } bool DatabaseWithOwnTablesBase::empty(const Context & /*context*/) const { std::lock_guard lock(mutex); - return tables.empty(); + return tables.empty() && dictionaries.empty(); } StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name) @@ -149,6 +111,21 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name) return res; } +DictionaryPtr DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name) +{ + DictionaryPtr res; + { + std::lock_guard lock(mutex); + auto it = dictionaries.find(dictionary_name); + if (it == dictionaries.end()) + throw Exception("Dictionary " + name + "." + dictionary_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); + res = it->second; + dictionaries.erase(it); + } + + return res; +} + void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const StoragePtr & table) { std::lock_guard lock(mutex); @@ -156,6 +133,14 @@ void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const Sto throw Exception("Table " + name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } + +void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name, const DictionaryPtr & dictionary) +{ + std::lock_guard lock(mutex); + if (!dictionaries.emplace(dictionary_name, dictionary).second) + throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); +} + void DatabaseWithOwnTablesBase::shutdown() { /// You can not hold a lock during shutdown. diff --git a/dbms/src/Databases/DatabasesCommon.h b/dbms/src/Databases/DatabasesCommon.h index 9eba0fc0596..02ff2cd8483 100644 --- a/dbms/src/Databases/DatabasesCommon.h +++ b/dbms/src/Databases/DatabasesCommon.h @@ -15,63 +15,6 @@ namespace DB class Context; - -/** Get the row with the table definition based on the CREATE query. - * It is an ATTACH query that you can execute to create a table from the correspondent database. - * See the implementation. - */ -String getTableDefinitionFromCreateQuery(const ASTPtr & query); - - -/** Create a table by its definition, without using InterpreterCreateQuery. - * (InterpreterCreateQuery has more complex functionality, and it can not be used if the database has not been created yet) - * Returns the table name and the table itself. - * You must subsequently call IStorage::startup method to use the table. - */ -std::pair createTableFromDefinition( - const String & definition, - const String & database_name, - const String & database_data_path, - Context & context, - bool has_force_restore_data_flag, - const String & description_for_error_message); - - -/// Copies list of tables and iterates through such snapshot. -class DatabaseSnapshotIterator final : public IDatabaseIterator -{ -private: - Tables tables; - Tables::iterator it; - -public: - DatabaseSnapshotIterator(Tables & tables_) - : tables(tables_), it(tables.begin()) {} - - DatabaseSnapshotIterator(Tables && tables_) - : tables(tables_), it(tables.begin()) {} - - void next() override - { - ++it; - } - - bool isValid() const override - { - return it != tables.end(); - } - - const String & name() const override - { - return it->first; - } - - const StoragePtr & table() const override - { - return it->second; - } -}; - /// A base class for databases that manage their own list of tables. class DatabaseWithOwnTablesBase : public IDatabase { @@ -80,18 +23,27 @@ public: const Context & context, const String & table_name) const override; + bool isDictionaryExist(const Context & context, const String & dictionary_name) const override; + StoragePtr tryGetTable( const Context & context, const String & table_name) const override; - bool empty(const Context & context) const override; + DictionaryPtr tryGetDictionary(const Context & context, const String & dictionary_name) const override; + bool empty(const Context & context) const override; void attachTable(const String & table_name, const StoragePtr & table) override; + void attachDictionary(const String & name, const DictionaryPtr & dictionary) override; + StoragePtr detachTable(const String & table_name) override; - DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DictionaryPtr detachDictionary(const String & name) override; + + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + + DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; void shutdown() override; @@ -102,6 +54,7 @@ protected: mutable std::mutex mutex; Tables tables; + Dictionaries dictionaries; DatabaseWithOwnTablesBase(String name_) : name(std::move(name_)) { } }; diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 8fce43c4b2d..edddc75fbe9 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -25,10 +26,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } - -/** Allows to iterate over tables. - */ -class IDatabaseIterator +class IDatabaseTablesIterator { public: virtual void next() = 0; @@ -37,15 +35,58 @@ public: virtual const String & name() const = 0; virtual const StoragePtr & table() const = 0; - virtual ~IDatabaseIterator() {} + virtual ~IDatabaseTablesIterator() = default; }; -using DatabaseIteratorPtr = std::unique_ptr; +/// Copies list of tables and iterates through such snapshot. +class DatabaseTablesSnapshotIterator : public IDatabaseTablesIterator +{ +private: + Tables tables; + Tables::iterator it; + +public: + DatabaseTablesSnapshotIterator(Tables & tables_) : tables(tables_), it(tables.begin()) {} + + DatabaseTablesSnapshotIterator(Tables && tables_) : tables(tables_), it(tables.begin()) {} + + void next() { ++it; } + + bool isValid() const { return it != tables.end(); } + + const String & name() const { return it->first; } + + const StoragePtr & table() const { return it->second; } +}; + +/// Copies list of dictionaries and iterates through such snapshot. +class DatabaseDictionariesSnapshotIterator +{ +private: + Dictionaries dictionaries; + Dictionaries::iterator it; + +public: + DatabaseDictionariesSnapshotIterator(Dictionaries & dictionaries_) : dictionaries(dictionaries_), it(dictionaries.begin()) {} + + DatabaseDictionariesSnapshotIterator(Dictionaries && dictionaries_) : dictionaries(dictionaries_), it(dictionaries.begin()) {} + + void next() { ++it; } + + bool isValid() const { return it != dictionaries.end(); } + + const String & name() const { return it->first; } + + DictionaryPtr & dictionary() const { return it->second; } +}; + +using DatabaseTablesIteratorPtr = std::unique_ptr; +using DatabaseDictionariesIteratorPtr = std::unique_ptr; /** Database engine. * It is responsible for: - * - initialization of set of known tables; + * - initialization of set of known tables and dictionaries; * - checking existence of a table and getting a table object; * - retrieving a list of all tables; * - creating and dropping tables; @@ -60,7 +101,7 @@ public: /// Load a set of existing tables. /// You can call only once, right after the object is created. - virtual void loadTables( + virtual void loadStoredObjects( Context & context, bool has_force_restore_data_flag) = 0; @@ -69,16 +110,29 @@ public: const Context & context, const String & name) const = 0; + /// Check the existence of the dictionary + virtual bool isDictionaryExist( + const Context & context, + const String & name) const = 0; + /// Get the table for work. Return nullptr if there is no table. virtual StoragePtr tryGetTable( const Context & context, const String & name) const = 0; + /// Get the dictionary for work. Return nullptr if there is no table. + virtual DictionaryPtr tryGetDictionary( + const Context & context, + const String & name) const = 0; + using FilterByNameFunction = std::function; /// Get an iterator that allows you to pass through all the tables. /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above. - virtual DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) = 0; + virtual DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) = 0; + + /// Get an iterator to pass through all the dictionaries. + virtual DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) = 0; /// Is the database empty. virtual bool empty(const Context & context) const = 0; @@ -90,17 +144,35 @@ public: const StoragePtr & table, const ASTPtr & query) = 0; + /// Add the dictionary to the database. Record its presence in the metadata. + virtual void createDictionary( + const Context & context, + const String & dictionary_name, + const DictionaryPtr & dict_ptr, + const ASTPtr & query) = 0; + /// Delete the table from the database. Delete the metadata. virtual void removeTable( const Context & context, const String & name) = 0; + /// Delete the dictionary from the database. Delete the metadata. + virtual void removeDictionary( + const Context & context, + const String & dictionary_name) = 0; + /// Add a table to the database, but do not add it to the metadata. The database may not support this method. virtual void attachTable(const String & name, const StoragePtr & table) = 0; + /// Add dictionary to the database, but do not add it to the metadata. The database may not support this method. + virtual void attachDictionary(const String & name, const DictionaryPtr & dictionary) = 0; + /// Forget about the table without deleting it, and return it. The database may not support this method. virtual StoragePtr detachTable(const String & name) = 0; + /// Forget about the dictionary without deleting it, and return it. The database may not support this method. + virtual DictionaryPtr detachDictionary(const String & name) = 0; + /// Rename the table and possibly move the table to another database. virtual void renameTable( const Context & /*context*/, @@ -128,7 +200,7 @@ public: } /// Returns time of table's metadata change, 0 if there is no corresponding metadata file. - virtual time_t getTableMetadataModificationTime( + virtual time_t getObjectMetadataModificationTime( const Context & context, const String & name) = 0; @@ -140,6 +212,14 @@ public: return tryGetCreateTableQuery(context, name); } + /// Get the CREATE DICTIONARY query for the dictionary. Returns nullptr if dictionary doesn't exists. + virtual ASTPtr tryGetCreateDictionaryQuery(const Context & context, const String & name) const = 0; + + virtual ASTPtr getCreateDictionaryQuery(const Context & context, const String & name) const + { + return tryGetCreateDictionaryQuery(context, name); + } + /// Get the CREATE DATABASE query for current database. virtual ASTPtr getCreateDatabaseQuery(const Context & context) const = 0; @@ -150,7 +230,7 @@ public: /// Returns metadata path if the database supports it, empty string otherwise virtual String getMetadataPath() const { return {}; } /// Returns metadata path of a concrete table if the database supports it, empty string otherwise - virtual String getTableMetadataPath(const String & /*table_name*/) const { return {}; } + virtual String getObjectMetadataPath(const String & /*table_name*/) const { return {}; } /// Ask all tables to complete the background threads they are using and delete all table objects. virtual void shutdown() = 0; diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index 43ae9d5623b..d22ea41fd84 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -20,7 +20,10 @@ void DictionaryFactory::registerLayout(const std::string & layout_type, Creator DictionaryPtr DictionaryFactory::create( - const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Context & context) const + const std::string & name, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const Context & context) const { Poco::Util::AbstractConfiguration::Keys keys; const auto & layout_prefix = config_prefix + ".layout"; @@ -47,6 +50,15 @@ DictionaryPtr DictionaryFactory::create( throw Exception{name + ": unknown dictionary layout type: " + layout_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; } +DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const +{ + (void)(name); + (void)(ast); + (void)(context); + return nullptr; +} + + DictionaryFactory & DictionaryFactory::instance() { static DictionaryFactory ret; diff --git a/dbms/src/Dictionaries/DictionaryFactory.h b/dbms/src/Dictionaries/DictionaryFactory.h index dbfdc563aa4..fd7978f590f 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.h +++ b/dbms/src/Dictionaries/DictionaryFactory.h @@ -1,6 +1,7 @@ #pragma once #include "IDictionary.h" +#include namespace Poco @@ -27,7 +28,15 @@ public: static DictionaryFactory & instance(); - DictionaryPtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Context & context) const; + DictionaryPtr create( + const std::string & name, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const Context & context) const; + + DictionaryPtr create(const std::string & name, + const ASTCreateQuery & ast, + const Context & context) const; using Creator = std::function; + const Context & context)>; DictionarySourceFactory(); @@ -42,7 +42,7 @@ public: const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct, - Context & context) const; + const Context & context) const; private: using SourceRegistry = std::unordered_map; diff --git a/dbms/src/Dictionaries/IDictionary.h b/dbms/src/Dictionaries/IDictionary.h index 9ce7c569f75..def5d58e243 100644 --- a/dbms/src/Dictionaries/IDictionary.h +++ b/dbms/src/Dictionaries/IDictionary.h @@ -16,7 +16,7 @@ namespace DB { struct IDictionaryBase; -using DictionaryPtr = std::unique_ptr; +using DictionaryPtr = std::shared_ptr; struct DictionaryStructure; class ColumnString; @@ -57,7 +57,7 @@ struct IDictionaryBase : public IExternalLoadable virtual std::exception_ptr getLastException() const { return {}; } - std::shared_ptr shared_from_this() + DictionaryPtr shared_from_this() { return std::static_pointer_cast(IExternalLoadable::shared_from_this()); } diff --git a/dbms/src/Dictionaries/IDictionary_fwd.h b/dbms/src/Dictionaries/IDictionary_fwd.h new file mode 100644 index 00000000000..362d85fa26d --- /dev/null +++ b/dbms/src/Dictionaries/IDictionary_fwd.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +struct IDictionaryBase; +using DictionaryPtr = std::shared_ptr; +using Dictionaries = std::map; + +} diff --git a/dbms/src/Interpreters/ActionLocksManager.cpp b/dbms/src/Interpreters/ActionLocksManager.cpp index 79578252325..df0907f551b 100644 --- a/dbms/src/Interpreters/ActionLocksManager.cpp +++ b/dbms/src/Interpreters/ActionLocksManager.cpp @@ -23,7 +23,7 @@ template inline void forEachTable(Context & context, F && f) { for (auto & elem : context.getDatabases()) - for (auto iterator = elem.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = elem.second->getTablesIterator(context); iterator->isValid(); iterator->next()) f(iterator->table()); } diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index 7cb137e3f68..4fc1988bbef 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -167,7 +167,7 @@ void AsynchronousMetrics::update() /// Lazy database can not contain MergeTree tables if (db.second->getEngineName() == "Lazy") continue; - for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { ++total_number_of_tables; auto & table = iterator->table(); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index d187f68eda3..c2261b1acc7 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -163,7 +163,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (need_write_metadata) Poco::File(metadata_file_tmp_path).renameTo(metadata_file_path); - database->loadTables(context, has_force_restore_data_flag); + database->loadStoredObjects(context, has_force_restore_data_flag); } catch (...) { diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 7887ebc8892..f606fd3f26b 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -184,7 +184,7 @@ BlockIO InterpreterDropQuery::executeToDatabase(String & database_name, ASTDropQ } else if (kind == ASTDropQuery::Kind::Drop) { - for (auto iterator = database->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { String current_table_name = iterator->name(); executeToTable(database_name, current_table_name, kind, false, false, false); diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index d4cdf10fd63..8b1e4608c4a 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -315,7 +315,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) DatabasePtr & database = elem.second; const String & database_name = elem.first; - for (auto iterator = database->getIterator(system_context); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(system_context); iterator->isValid(); iterator->next()) { if (dynamic_cast(iterator->table().get())) replica_names.emplace_back(database_name, iterator->name()); diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 56ab949f30c..deaea288e7b 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -143,7 +143,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & { auto stage_in_source_tables = QueryProcessingStage::FetchColumns; - DatabaseIteratorPtr iterator = getDatabaseIterator(context); + DatabaseTablesIteratorPtr iterator = getDatabaseIterator(context); size_t selected_table_size = 0; @@ -353,7 +353,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, bool get_lock, const String & query_id) const { StorageListWithLocks selected_tables; - DatabaseIteratorPtr iterator = getDatabaseIterator(global_context); + DatabaseTablesIteratorPtr iterator = getDatabaseIterator(global_context); auto virtual_column = ColumnString::create(); @@ -387,12 +387,12 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr } -DatabaseIteratorPtr StorageMerge::getDatabaseIterator(const Context & context) const +DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const Context & context) const { checkStackSize(); auto database = context.getDatabase(source_database); auto table_name_match = [this](const String & table_name_) { return table_name_regexp.match(table_name_); }; - return database->getIterator(global_context, table_name_match); + return database->getTablesIterator(global_context, table_name_match); } diff --git a/dbms/src/Storages/StorageMerge.h b/dbms/src/Storages/StorageMerge.h index dbf5d219957..debcb4da58e 100644 --- a/dbms/src/Storages/StorageMerge.h +++ b/dbms/src/Storages/StorageMerge.h @@ -71,7 +71,7 @@ private: template StoragePtr getFirstTable(F && predicate) const; - DatabaseIteratorPtr getDatabaseIterator(const Context & context) const; + DatabaseTablesIteratorPtr getDatabaseIterator(const Context & context) const; protected: StorageMerge( From 2a5713e6e4cd77cfd2dc808a68e2bfbb915ec248 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Oct 2019 23:47:47 +0300 Subject: [PATCH 003/122] Compilable code --- .../programs/server/ReplicasStatusHandler.cpp | 2 +- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Common/XDBCBridgeHelper.h | 4 +- dbms/src/Databases/DatabaseDictionary.cpp | 70 +++++++++++++++++++ dbms/src/Databases/DatabaseLazy.cpp | 58 +++++++++++++++ dbms/src/Databases/DatabaseLazy.h | 5 +- dbms/src/Databases/DatabaseOnDisk.h | 4 +- dbms/src/Databases/DatabaseOrdinary.cpp | 7 ++ .../ClickHouseDictionarySource.cpp | 4 +- .../Dictionaries/ClickHouseDictionarySource.h | 2 +- .../ExecutableDictionarySource.cpp | 2 +- .../src/Dictionaries/FileDictionarySource.cpp | 2 +- .../src/Dictionaries/HTTPDictionarySource.cpp | 2 +- .../src/Dictionaries/XDBCDictionarySource.cpp | 2 +- dbms/src/Interpreters/Context.cpp | 2 +- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Interpreters/tests/create_query.cpp | 2 +- dbms/src/Interpreters/tests/select_query.cpp | 2 +- .../StorageSystemBuildOptions.generated.cpp | 66 +++++++++++++++++ .../Storages/System/StorageSystemColumns.cpp | 2 +- .../Storages/System/StorageSystemGraphite.cpp | 2 +- .../System/StorageSystemMutations.cpp | 2 +- .../System/StorageSystemPartsBase.cpp | 2 +- .../Storages/System/StorageSystemReplicas.cpp | 2 +- .../System/StorageSystemReplicationQueue.cpp | 2 +- .../Storages/System/StorageSystemTables.cpp | 8 +-- .../src/TableFunctions/TableFunctionMerge.cpp | 2 +- 27 files changed, 233 insertions(+), 28 deletions(-) create mode 100644 dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp diff --git a/dbms/programs/server/ReplicasStatusHandler.cpp b/dbms/programs/server/ReplicasStatusHandler.cpp index cbbdacccc56..22e059d9ccc 100644 --- a/dbms/programs/server/ReplicasStatusHandler.cpp +++ b/dbms/programs/server/ReplicasStatusHandler.cpp @@ -44,7 +44,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request if (db.second->getEngineName() == "Lazy") continue; - for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { auto & table = iterator->table(); StorageReplicatedMergeTree * table_replicated = dynamic_cast(table.get()); diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index d931f4ada1c..60164a3f49f 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -460,6 +460,7 @@ namespace ErrorCodes extern const int TOO_MANY_REDIRECTS = 483; extern const int INTERNAL_REDIS_ERROR = 484; extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 500; + extern const int DICTIONARY_ALREADY_EXISTS = 501; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/XDBCBridgeHelper.h b/dbms/src/Common/XDBCBridgeHelper.h index 9b4cb96b468..c8ecb30ec68 100644 --- a/dbms/src/Common/XDBCBridgeHelper.h +++ b/dbms/src/Common/XDBCBridgeHelper.h @@ -68,7 +68,7 @@ protected: public: using Configuration = Poco::Util::AbstractConfiguration; - Context & context; + const Context & context; const Configuration & config; static constexpr inline auto DEFAULT_HOST = "localhost"; @@ -79,7 +79,7 @@ public: static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; static constexpr inline auto PING_OK_ANSWER = "Ok."; - XDBCBridgeHelper(Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) + XDBCBridgeHelper(const Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) : http_timeout(http_timeout_), connection_string(connection_string_), context(global_context_), config(context.getConfigRef()) { size_t bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT); diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 4db63ca7b44..867d3dfe395 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; extern const int SYNTAX_ERROR; + extern const int UNSUPPORTED_METHOD; } DatabaseDictionary::DatabaseDictionary(const String & name_) @@ -64,6 +65,75 @@ bool DatabaseDictionary::isTableExist( return context.getExternalDictionariesLoader().getCurrentStatus(table_name) != ExternalLoader::Status::NOT_EXIST; } + +bool DatabaseDictionary::isDictionaryExist( + const Context & /*context*/, + const String & /*table_name*/) const +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + +DictionaryPtr DatabaseDictionary::tryGetDictionary( + const Context & /*context*/, + const String & /*dictionary_name*/) const +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +DatabaseDictionariesIteratorPtr DatabaseDictionary::getDictionariesIterator( + const Context & /*context*/, + const FilterByNameFunction & /*filter_by_dictionary_name*/) +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +void DatabaseDictionary::createDictionary( + const Context & /*context*/, + const String & /*dictionary_name*/, + const DictionaryPtr & /*dict_ptr*/, + const ASTPtr & /*query*/) +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + +void DatabaseDictionary::removeDictionary( + const Context & /*context*/, + const String & /*table_name*/) +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + +void DatabaseDictionary::attachDictionary( + const String & /*dictionary_name*/, + const DictionaryPtr & /*table*/) +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + +DictionaryPtr DatabaseDictionary::detachDictionary( + const String & /*dictionary_name*/) +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +ASTPtr DatabaseDictionary::tryGetCreateDictionaryQuery( + const Context & /*context*/, + const String & /*table_name*/) const +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +ASTPtr DatabaseDictionary::getCreateDictionaryQuery( + const Context & /*context*/, + const String & /*table_name*/) const +{ + throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); +} + StoragePtr DatabaseDictionary::tryGetTable( const Context & context, const String & table_name) const diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index fcf023d369d..9cd6294b2e3 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -74,6 +74,16 @@ void DatabaseLazy::createTable( } +void DatabaseLazy::createDictionary( + const Context & /*context*/, + const String & /*dictionary_name*/, + const DictionaryPtr & /*dict_ptr*/, + const ASTPtr & /*query*/) +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + + void DatabaseLazy::removeTable( const Context & context, const String & table_name) @@ -82,6 +92,54 @@ void DatabaseLazy::removeTable( DatabaseOnDisk::removeTable(*this, context, table_name, log); } + +void DatabaseLazy::removeDictionary(const Context & /*context*/, const String & /*table_name*/) +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + +ASTPtr DatabaseLazy::getCreateDictionaryQuery(const Context & /*context*/, const String & /*table_name*/) const +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + +ASTPtr DatabaseLazy::tryGetCreateDictionaryQuery(const Context & /*context*/, const String & /*table_name*/) const +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + +bool DatabaseLazy::isDictionaryExist(const Context & /*context*/, const String & /*table_name*/) const +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +DictionaryPtr DatabaseLazy::tryGetDictionary(const Context & /*context*/, const String & /*dictionary_name*/) const +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( + const Context & /*context*/, + const FilterByNameFunction & /*filter_by_dictionary_name*/) +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + + +void DatabaseLazy::attachDictionary( + const String & /*dictionary_name*/, + const DictionaryPtr & /*table*/) +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + +DictionaryPtr DatabaseLazy::detachDictionary(const String & /*dictionary_name*/) +{ + throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); +} + void DatabaseLazy::renameTable( const Context & context, const String & table_name, diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index a4328f6b46b..16a2b859519 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -173,7 +173,10 @@ private: class DatabaseLazyIterator final : public IDatabaseTablesIterator { public: - DatabaseLazyIterator(DatabaseLazy & database_, const Context & context_, Strings && table_names_); + DatabaseLazyIterator( + DatabaseLazy & database_, + const Context & context_, + Strings && table_names_); void next() override; bool isValid() const override; diff --git a/dbms/src/Databases/DatabaseOnDisk.h b/dbms/src/Databases/DatabaseOnDisk.h index ad4bf4d2463..26cdcd0f9b6 100644 --- a/dbms/src/Databases/DatabaseOnDisk.h +++ b/dbms/src/Databases/DatabaseOnDisk.h @@ -12,7 +12,7 @@ namespace DB namespace detail { - String getTableMetadataPath(const String & base_path, const String & table_name); + String getObjectMetadataPath(const String & base_path, const String & dictionary_name); String getDatabaseMetadataPath(const String & base_path); ASTPtr getQueryFromMetadata(const String & metadata_path, bool throw_on_error = true); ASTPtr getCreateQueryFromMetadata(const String & metadata_path, const String & database, bool throw_on_error); @@ -170,7 +170,7 @@ void DatabaseOnDisk::renameTable( throw Exception{Exception::CreateFromPoco, e}; } - ASTPtr ast = detail::getQueryFromMetadata(detail::getTableMetadataPath(database.getMetadataPath(), table_name)); + ASTPtr ast = detail::getQueryFromMetadata(detail::getObjectMetadataPath(database.getMetadataPath(), table_name)); if (!ast) throw Exception("There is no metadata file for table " + backQuote(table_name) + ".", ErrorCodes::FILE_DOESNT_EXIST); ast->as().table = to_table_name; diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 2eac82bba82..75c2db29495 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -242,6 +242,13 @@ void DatabaseOrdinary::removeTable( DatabaseOnDisk::removeTable(*this, context, table_name, log); } +void DatabaseOrdinary::removeDictionary( + const Context & context, + const String & table_name) +{ + DatabaseOnDisk::removeDictionary(*this, context, table_name, log); +} + void DatabaseOrdinary::renameTable( const Context & context, const String & table_name, diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index a3b4e8c5cfb..6586f979687 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -52,7 +52,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Block & sample_block_, - Context & context_) + const Context & context_) : update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} , host{config.getString(config_prefix + ".host")} @@ -206,7 +206,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr + const Context & context) -> DictionarySourcePtr { return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context); }; diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.h b/dbms/src/Dictionaries/ClickHouseDictionarySource.h index 3df962708bd..84bbd78b93a 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.h @@ -23,7 +23,7 @@ public: const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Block & sample_block_, - Context & context); + const Context & context); /// copy-constructor is provided in order to support cloneability ClickHouseDictionarySource(const ClickHouseDictionarySource & other); diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp index d76de3abe0e..68c48d2b457 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp @@ -217,7 +217,7 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr + const Context & context) -> DictionarySourcePtr { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `executable` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/FileDictionarySource.cpp b/dbms/src/Dictionaries/FileDictionarySource.cpp index 1505c2629f6..1c9f9d60ce3 100644 --- a/dbms/src/Dictionaries/FileDictionarySource.cpp +++ b/dbms/src/Dictionaries/FileDictionarySource.cpp @@ -56,7 +56,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr + const Context & context) -> DictionarySourcePtr { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `file` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.cpp b/dbms/src/Dictionaries/HTTPDictionarySource.cpp index 184470ae7c5..2a8269d4047 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/src/Dictionaries/HTTPDictionarySource.cpp @@ -188,7 +188,7 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr + const Context & context) -> DictionarySourcePtr { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/XDBCDictionarySource.cpp b/dbms/src/Dictionaries/XDBCDictionarySource.cpp index 627092844ec..12b6b72fceb 100644 --- a/dbms/src/Dictionaries/XDBCDictionarySource.cpp +++ b/dbms/src/Dictionaries/XDBCDictionarySource.cpp @@ -238,7 +238,7 @@ void registerDictionarySourceXDBC(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr { + const Context & context) -> DictionarySourcePtr { #if USE_POCO_SQLODBC || USE_POCO_DATAODBC BridgeHelperPtr bridge = std::make_shared>( context, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index fef48898828..728f4bb5f4d 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -2039,7 +2039,7 @@ void Context::dropCompiledExpressionCache() const #endif -void Context::addXDBCBridgeCommand(std::unique_ptr cmd) +void Context::addXDBCBridgeCommand(std::unique_ptr cmd) const { auto lock = getLock(); shared->bridge_commands.emplace_back(std::move(cmd)); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index bba4fdb18e9..1a851961b7f 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -544,7 +544,7 @@ public: #endif /// Add started bridge command. It will be killed after context destruction - void addXDBCBridgeCommand(std::unique_ptr cmd); + void addXDBCBridgeCommand(std::unique_ptr cmd) const; IHostContextPtr & getHostContext(); const IHostContextPtr & getHostContext() const; diff --git a/dbms/src/Interpreters/tests/create_query.cpp b/dbms/src/Interpreters/tests/create_query.cpp index 47e1f202db7..fc487f4b7bb 100644 --- a/dbms/src/Interpreters/tests/create_query.cpp +++ b/dbms/src/Interpreters/tests/create_query.cpp @@ -84,7 +84,7 @@ try context.setPath("./"); auto database = std::make_shared("test", "./metadata/test/", context); context.addDatabase("test", database); - database->loadTables(context, false); + database->loadStoredObjects(context, false); context.setCurrentDatabase("test"); InterpreterCreateQuery interpreter(ast, context); diff --git a/dbms/src/Interpreters/tests/select_query.cpp b/dbms/src/Interpreters/tests/select_query.cpp index 1283ae6e659..54613fffd8e 100644 --- a/dbms/src/Interpreters/tests/select_query.cpp +++ b/dbms/src/Interpreters/tests/select_query.cpp @@ -39,7 +39,7 @@ try DatabasePtr system = std::make_shared("system", "./metadata/system/", context); context.addDatabase("system", system); - system->loadTables(context, false); + system->loadStoredObjects(context, false); attachSystemTablesLocal(*context.getDatabase("system")); context.setCurrentDatabase("default"); diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp new file mode 100644 index 00000000000..10b7a4cafdb --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp @@ -0,0 +1,66 @@ +// .cpp autogenerated by cmake + +#define BUILD_DETERMINISTIC 0 + +const char * auto_config_build[] +{ + "VERSION_FULL", "ClickHouse 19.15.1.1", + "VERSION_DESCRIBE", "v19.15.1.1-prestable", + "VERSION_INTEGER", "19015001", + +#if BUILD_DETERMINISTIC + "SYSTEM", "Linux", +#else + "VERSION_GITHASH", "6f1a8c37abe6ee4e7ee74c0b5cb9c05a87417b61", + "VERSION_REVISION", "54426", + "BUILD_DATE", "2019-09-20", + "SYSTEM", "Linux-5.0.0-23-generic", +#endif + + "BUILD_TYPE", "RELWITHDEBINFO", + "SYSTEM_PROCESSOR", "x86_64", + "LIBRARY_ARCHITECTURE", "x86_64-linux-gnu", + "CMAKE_VERSION", "3.15.3", + "C_COMPILER", "/usr/bin/gcc-9", + "C_COMPILER_VERSION", "9.1.0", + "CXX_COMPILER", "/usr/bin/g++-9", + "CXX_COMPILER_VERSION", "9.1.0", + "C_FLAGS", " -fdiagnostics-color=always -pipe -msse4.1 -msse4.2 -mpopcnt -fno-omit-frame-pointer -Wall -O2 -g -DNDEBUG -O3 ", + "CXX_FLAGS", " -fdiagnostics-color=always -fsized-deallocation -pipe -msse4.1 -msse4.2 -mpopcnt -fno-omit-frame-pointer -Wall -Wnon-virtual-dtor -Wno-array-bounds -O2 -g -DNDEBUG -O3 ", + "LINK_FLAGS", " -fuse-ld=gold -rdynamic -Wl,--no-undefined", + "BUILD_COMPILE_DEFINITIONS", "", + "BUILD_INCLUDE_DIRECTORIES", "", + "STATIC", "ON", + "SPLIT_BINARY", "", + "USE_EMBEDDED_COMPILER", "1", + "USE_INTERNAL_MEMCPY", "ON", + "USE_GLIBC_COMPATIBILITY", "ON", + "USE_JEMALLOC", "1", + "USE_TCMALLOC", "", + "USE_MIMALLOC", "", + "USE_UNWIND", "ON", + "USE_ICU", "1", + "USE_H3", "1", + "USE_MYSQL", "1", + "USE_RE2_ST", "1", + "USE_VECTORCLASS", "", + "USE_LIBGSASL", "1", + "USE_RDKAFKA", "1", + "USE_CAPNP", "1", + "USE_POCO_SQLODBC", "", + "USE_POCO_DATAODBC", "1", + "USE_POCO_MONGODB", "1", + "USE_POCO_NETSSL", "1", + "USE_BASE64", "1", + "USE_XXHASH", "1", + "USE_HDFS", "1", + "USE_SNAPPY", "1", + "USE_PARQUET", "1", + "USE_PROTOBUF", "1", + "USE_BROTLI", "1", + "USE_SSL", "1", + "USE_HYPERSCAN", "1", + "USE_SIMDJSON", "ON", + + nullptr, nullptr +}; diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index c1d5c827f30..c42e7ec586d 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -292,7 +292,7 @@ BlockInputStreams StorageSystemColumns::read( const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; - for (auto iterator = database->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, diff --git a/dbms/src/Storages/System/StorageSystemGraphite.cpp b/dbms/src/Storages/System/StorageSystemGraphite.cpp index b8f04103e91..36839e06196 100644 --- a/dbms/src/Storages/System/StorageSystemGraphite.cpp +++ b/dbms/src/Storages/System/StorageSystemGraphite.cpp @@ -36,7 +36,7 @@ StorageSystemGraphite::Configs StorageSystemGraphite::getConfigs(const Context & if (db.second->getEngineName() == "Lazy") continue; - for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { auto & table = iterator->table(); diff --git a/dbms/src/Storages/System/StorageSystemMutations.cpp b/dbms/src/Storages/System/StorageSystemMutations.cpp index 7bce6628c0e..0af32466d88 100644 --- a/dbms/src/Storages/System/StorageSystemMutations.cpp +++ b/dbms/src/Storages/System/StorageSystemMutations.cpp @@ -43,7 +43,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex if (db.second->getEngineName() == "Lazy") continue; if (context.hasDatabaseAccessRights(db.first)) - for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) if (dynamic_cast(iterator->table().get())) merge_tree_tables[db.first][iterator->name()] = iterator->table(); } diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp index aa6c61f202f..bc260cd72c6 100644 --- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp @@ -106,7 +106,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const continue; offsets[i] = i ? offsets[i - 1] : 0; - for (auto iterator = database->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { String table_name = iterator->name(); StoragePtr storage = iterator->table(); diff --git a/dbms/src/Storages/System/StorageSystemReplicas.cpp b/dbms/src/Storages/System/StorageSystemReplicas.cpp index 3e5c11c7787..3e319e19bd7 100644 --- a/dbms/src/Storages/System/StorageSystemReplicas.cpp +++ b/dbms/src/Storages/System/StorageSystemReplicas.cpp @@ -71,7 +71,7 @@ BlockInputStreams StorageSystemReplicas::read( continue; if (context.hasDatabaseAccessRights(db.first)) { - for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) if (dynamic_cast(iterator->table().get())) replicated_tables[db.first][iterator->name()] = iterator->table(); } diff --git a/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp b/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp index e29900295d8..5d6aa412152 100644 --- a/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -57,7 +57,7 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const if (context.hasDatabaseAccessRights(db.first)) { - for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) if (dynamic_cast(iterator->table().get())) replicated_tables[db.first][iterator->name()] = iterator->table(); } diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 27861fcb909..9ac3245a752 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -189,7 +189,7 @@ protected: } if (!tables_it || !tables_it->isValid()) - tables_it = database->getIterator(context); + tables_it = database->getTablesIterator(context); const bool need_lock_structure = needLockStructure(database, header); @@ -251,10 +251,10 @@ protected: } if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database->getTableMetadataPath(table_name)); + res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(database->getTableMetadataModificationTime(context, table_name))); + res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(context, table_name))); { Array dependencies_table_name_array; @@ -372,7 +372,7 @@ private: UInt64 max_block_size; ColumnPtr databases; size_t database_idx = 0; - DatabaseIteratorPtr tables_it; + DatabaseTablesIteratorPtr tables_it; const Context context; bool done = false; DatabasePtr database; diff --git a/dbms/src/TableFunctions/TableFunctionMerge.cpp b/dbms/src/TableFunctions/TableFunctionMerge.cpp index b5d15707e89..0cae1cda987 100644 --- a/dbms/src/TableFunctions/TableFunctionMerge.cpp +++ b/dbms/src/TableFunctions/TableFunctionMerge.cpp @@ -33,7 +33,7 @@ static NamesAndTypesList chooseColumns(const String & source_database, const Str { auto database = context.getDatabase(source_database); - auto iterator = database->getIterator(context, table_name_match); + auto iterator = database->getTablesIterator(context, table_name_match); if (iterator->isValid()) any_table = iterator->table(); From 77c6d4d48f2d3ba123b56f1ab1d1f65f87b271a8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Oct 2019 12:05:12 +0300 Subject: [PATCH 004/122] Changes --- contrib/mariadb-connector-c | 2 +- dbms/src/Databases/DatabaseDictionary.cpp | 8 ++++---- dbms/src/Databases/DatabaseLazy.cpp | 18 +++++++++--------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/contrib/mariadb-connector-c b/contrib/mariadb-connector-c index c6503d3acc8..18016300b00 160000 --- a/contrib/mariadb-connector-c +++ b/contrib/mariadb-connector-c @@ -1 +1 @@ -Subproject commit c6503d3acc85ca1a7f5e7e38b605d7c9410aac1e +Subproject commit 18016300b00825a3fcbc6fb2aa37ac3e51416f71 diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 867d3dfe395..403725a7234 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -70,14 +70,14 @@ bool DatabaseDictionary::isDictionaryExist( const Context & /*context*/, const String & /*table_name*/) const { - throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); + return false; } DictionaryPtr DatabaseDictionary::tryGetDictionary( const Context & /*context*/, const String & /*dictionary_name*/) const { - throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); + return nullptr; } @@ -85,7 +85,7 @@ DatabaseDictionariesIteratorPtr DatabaseDictionary::getDictionariesIterator( const Context & /*context*/, const FilterByNameFunction & /*filter_by_dictionary_name*/) { - throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); + return nullptr; } @@ -123,7 +123,7 @@ ASTPtr DatabaseDictionary::tryGetCreateDictionaryQuery( const Context & /*context*/, const String & /*table_name*/) const { - throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); + return nullptr; } diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 9cd6294b2e3..89b8b51fd85 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -92,34 +92,35 @@ void DatabaseLazy::removeTable( DatabaseOnDisk::removeTable(*this, context, table_name, log); } - -void DatabaseLazy::removeDictionary(const Context & /*context*/, const String & /*table_name*/) +void DatabaseLazy::removeDictionary( + const Context & /*context*/, + const String & /*table_name*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } -ASTPtr DatabaseLazy::getCreateDictionaryQuery(const Context & /*context*/, const String & /*table_name*/) const +ASTPtr DatabaseLazy::getCreateDictionaryQuery( + const Context & /*context*/, + const String & /*table_name*/) const { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } ASTPtr DatabaseLazy::tryGetCreateDictionaryQuery(const Context & /*context*/, const String & /*table_name*/) const { - throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); + return nullptr; } bool DatabaseLazy::isDictionaryExist(const Context & /*context*/, const String & /*table_name*/) const { - throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); + return false; } - DictionaryPtr DatabaseLazy::tryGetDictionary(const Context & /*context*/, const String & /*dictionary_name*/) const { - throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); + return nullptr; } - DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( const Context & /*context*/, const FilterByNameFunction & /*filter_by_dictionary_name*/) @@ -127,7 +128,6 @@ DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } - void DatabaseLazy::attachDictionary( const String & /*dictionary_name*/, const DictionaryPtr & /*table*/) From 7159cde16089ecb5f4945f2a0a4fdad36e627a5d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Oct 2019 12:07:44 +0300 Subject: [PATCH 005/122] Remove accident change --- .../StorageSystemBuildOptions.generated.cpp | 66 ------------------- 1 file changed, 66 deletions(-) delete mode 100644 dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp deleted file mode 100644 index 10b7a4cafdb..00000000000 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// .cpp autogenerated by cmake - -#define BUILD_DETERMINISTIC 0 - -const char * auto_config_build[] -{ - "VERSION_FULL", "ClickHouse 19.15.1.1", - "VERSION_DESCRIBE", "v19.15.1.1-prestable", - "VERSION_INTEGER", "19015001", - -#if BUILD_DETERMINISTIC - "SYSTEM", "Linux", -#else - "VERSION_GITHASH", "6f1a8c37abe6ee4e7ee74c0b5cb9c05a87417b61", - "VERSION_REVISION", "54426", - "BUILD_DATE", "2019-09-20", - "SYSTEM", "Linux-5.0.0-23-generic", -#endif - - "BUILD_TYPE", "RELWITHDEBINFO", - "SYSTEM_PROCESSOR", "x86_64", - "LIBRARY_ARCHITECTURE", "x86_64-linux-gnu", - "CMAKE_VERSION", "3.15.3", - "C_COMPILER", "/usr/bin/gcc-9", - "C_COMPILER_VERSION", "9.1.0", - "CXX_COMPILER", "/usr/bin/g++-9", - "CXX_COMPILER_VERSION", "9.1.0", - "C_FLAGS", " -fdiagnostics-color=always -pipe -msse4.1 -msse4.2 -mpopcnt -fno-omit-frame-pointer -Wall -O2 -g -DNDEBUG -O3 ", - "CXX_FLAGS", " -fdiagnostics-color=always -fsized-deallocation -pipe -msse4.1 -msse4.2 -mpopcnt -fno-omit-frame-pointer -Wall -Wnon-virtual-dtor -Wno-array-bounds -O2 -g -DNDEBUG -O3 ", - "LINK_FLAGS", " -fuse-ld=gold -rdynamic -Wl,--no-undefined", - "BUILD_COMPILE_DEFINITIONS", "", - "BUILD_INCLUDE_DIRECTORIES", "", - "STATIC", "ON", - "SPLIT_BINARY", "", - "USE_EMBEDDED_COMPILER", "1", - "USE_INTERNAL_MEMCPY", "ON", - "USE_GLIBC_COMPATIBILITY", "ON", - "USE_JEMALLOC", "1", - "USE_TCMALLOC", "", - "USE_MIMALLOC", "", - "USE_UNWIND", "ON", - "USE_ICU", "1", - "USE_H3", "1", - "USE_MYSQL", "1", - "USE_RE2_ST", "1", - "USE_VECTORCLASS", "", - "USE_LIBGSASL", "1", - "USE_RDKAFKA", "1", - "USE_CAPNP", "1", - "USE_POCO_SQLODBC", "", - "USE_POCO_DATAODBC", "1", - "USE_POCO_MONGODB", "1", - "USE_POCO_NETSSL", "1", - "USE_BASE64", "1", - "USE_XXHASH", "1", - "USE_HDFS", "1", - "USE_SNAPPY", "1", - "USE_PARQUET", "1", - "USE_PROTOBUF", "1", - "USE_BROTLI", "1", - "USE_SSL", "1", - "USE_HYPERSCAN", "1", - "USE_SIMDJSON", "ON", - - nullptr, nullptr -}; From ae42dc0d4c695644f6009c5d5bae34a5e00a9608 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Oct 2019 16:21:52 +0300 Subject: [PATCH 006/122] Attach detach and introspection queries for dictionaries --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Interpreters/Context.cpp | 37 ++++--- dbms/src/Interpreters/Context.h | 3 +- .../Interpreters/InterpreterCreateQuery.cpp | 35 ++++++- .../src/Interpreters/InterpreterCreateQuery.h | 1 + .../src/Interpreters/InterpreterDropQuery.cpp | 63 +++++++++++- dbms/src/Interpreters/InterpreterDropQuery.h | 2 + .../Interpreters/InterpreterExistsQuery.cpp | 24 ++++- dbms/src/Interpreters/InterpreterFactory.cpp | 8 ++ .../InterpreterShowCreateQuery.cpp | 36 ++++--- .../InterpreterShowTablesQuery.cpp | 13 ++- dbms/src/Parsers/ASTCreateQuery.cpp | 8 +- dbms/src/Parsers/ASTDictionary.cpp | 17 ++-- dbms/src/Parsers/ASTShowTablesQuery.cpp | 4 +- dbms/src/Parsers/ASTShowTablesQuery.h | 1 + dbms/src/Parsers/ParserCreateQuery.cpp | 29 ++++-- dbms/src/Parsers/ParserShowTablesQuery.cpp | 50 +++++----- .../System/StorageSystemDictionaries.cpp | 98 +++++++++++++++---- .../Storages/System/StorageSystemTables.cpp | 6 +- 19 files changed, 331 insertions(+), 105 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 60164a3f49f..608e2672dcc 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -461,6 +461,7 @@ namespace ErrorCodes extern const int INTERNAL_REDIS_ERROR = 484; extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 500; extern const int DICTIONARY_ALREADY_EXISTS = 501; + extern const int UNKNOWN_DICTIONARY = 502; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 728f4bb5f4d..6636766259f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -792,6 +792,16 @@ bool Context::isTableExist(const String & database_name, const String & table_na && it->second->isTableExist(*this, table_name); } +bool Context::isDictionaryExists(const String & database_name, const String & dictionary_name) const +{ + auto lock = getLock(); + + String db = resolveDatabase(database_name, current_database); + checkDatabaseAccessRightsImpl(db); + + Databases::const_iterator it = shared->databases.find(db); + return shared->databases.end() != it && it->second->isDictionaryExist(*this, dictionary_name); +} bool Context::isDatabaseExist(const String & database_name) const { @@ -807,22 +817,6 @@ bool Context::isExternalTableExist(const String & table_name) const } -void Context::assertTableExists(const String & database_name, const String & table_name) const -{ - auto lock = getLock(); - - String db = resolveDatabase(database_name, current_database); - checkDatabaseAccessRightsImpl(db); - - Databases::const_iterator it = shared->databases.find(db); - if (shared->databases.end() == it) - throw Exception("Database " + backQuoteIfNeed(db) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); - - if (!it->second->isTableExist(*this, table_name)) - throw Exception("Table " + backQuoteIfNeed(db) + "." + backQuoteIfNeed(table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); -} - - void Context::assertTableDoesntExist(const String & database_name, const String & table_name, bool check_database_access_rights) const { auto lock = getLock(); @@ -1063,6 +1057,17 @@ ASTPtr Context::getCreateTableQuery(const String & database_name, const String & return shared->databases[db]->getCreateTableQuery(*this, table_name); } + +ASTPtr Context::getCreateDictionaryQuery(const String & database_name, const String & dictionary_name) const +{ + auto lock = getLock(); + + String db = resolveDatabase(database_name, current_database); + assertDatabaseExists(db); + + return shared->databases[db]->getCreateDictionaryQuery(*this, dictionary_name); +} + ASTPtr Context::getCreateExternalTableQuery(const String & table_name) const { TableAndCreateASTs::const_iterator jt = external_tables.find(table_name); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 1a851961b7f..2655a35740b 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -248,9 +248,9 @@ public: /// Checking the existence of the table/database. Database can be empty - in this case the current database is used. bool isTableExist(const String & database_name, const String & table_name) const; bool isDatabaseExist(const String & database_name) const; + bool isDictionaryExists(const String & database_name, const String & dictionary_name) const; bool isExternalTableExist(const String & table_name) const; bool hasDatabaseAccessRights(const String & database_name) const; - void assertTableExists(const String & database_name, const String & table_name) const; bool hasDictionaryAccessRights(const String & dictionary_name) const; @@ -355,6 +355,7 @@ public: ASTPtr getCreateTableQuery(const String & database_name, const String & table_name) const; ASTPtr getCreateExternalTableQuery(const String & table_name) const; ASTPtr getCreateDatabaseQuery(const String & database_name) const; + ASTPtr getCreateDictionaryQuery(const String & database_name, const String & dictionary_name) const; const DatabasePtr getDatabase(const String & database_name) const; DatabasePtr getDatabase(const String & database_name); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index c2261b1acc7..55b407f6052 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -43,6 +43,8 @@ #include #include +#include + #include #include @@ -69,6 +71,7 @@ namespace ErrorCodes extern const int THERE_IS_NO_DEFAULT_VALUE; extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; + extern const int DICTIONARY_ALREADY_EXISTS; } @@ -700,6 +703,32 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return {}; } +BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create) +{ + String dictionary_name = create.table; + + String database_name = !create.database.empty() ? create.database : context.getCurrentDatabase(); + + auto guard = context.getDDLGuard(database_name, dictionary_name); + DatabasePtr database = context.getDatabase(database_name); + + if (database->isDictionaryExist(context, dictionary_name)) + { + if (create.if_not_exists) + return {}; + else + throw Exception( + "Dictionary " + database_name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); + } + + auto res = DictionaryFactory::instance().create(dictionary_name, create, context.getGlobalContext()); + if (create.attach) + database->attachDictionary(dictionary_name, res); + else + database->createDictionary(context, dictionary_name, res, query_ptr); + + return {}; +} BlockIO InterpreterCreateQuery::execute() { @@ -709,11 +738,11 @@ BlockIO InterpreterCreateQuery::execute() /// CREATE|ATTACH DATABASE if (!create.database.empty() && create.table.empty()) - { return createDatabase(create); - } - else + else if (!create.is_dictionary) return createTable(create); + else + return createDictionary(create); } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 2472f0321e1..fe32f44b84e 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -51,6 +51,7 @@ public: private: BlockIO createDatabase(ASTCreateQuery & create); BlockIO createTable(ASTCreateQuery & create); + BlockIO createDictionary(ASTCreateQuery & create); /// Calculate list of columns, constraints, indices, etc... of table and return columns. ColumnsDescription setProperties(ASTCreateQuery & create, const Block & as_select_sample, const StoragePtr & as_storage) const; diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index f606fd3f26b..15799e5df26 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int UNKNOWN_TABLE; extern const int QUERY_IS_PROHIBITED; + extern const int UNKNOWN_DICTIONARY; } @@ -39,15 +40,26 @@ BlockIO InterpreterDropQuery::execute() return executeDDLQueryOnCluster(query_ptr, context, {drop.database}); if (!drop.table.empty()) - return executeToTable(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock); + { + if (!drop.is_dictionary) + return executeToTable(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock); + else + return executeToDictionary(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock); + } else if (!drop.database.empty()) return executeToDatabase(drop.database, drop.kind, drop.if_exists); else - throw Exception("Database and table names is empty.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Nothing to drop, both names are empty.", ErrorCodes::LOGICAL_ERROR); } -BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & table_name, ASTDropQuery::Kind kind, bool if_exists, bool if_temporary, bool no_ddl_lock) +BlockIO InterpreterDropQuery::executeToTable( + String & database_name_, + String & table_name, + ASTDropQuery::Kind kind, + bool if_exists, + bool if_temporary, + bool no_ddl_lock) { if (if_temporary || database_name_.empty()) { @@ -95,6 +107,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t database_and_table.first->getMetadataPath() + escapeForFileName(database_and_table.second->getTableName()); + /// NOTE seems like duplication of the code inside removeTable method of database const auto prev_metadata_name = metadata_file_without_extension + ".sql"; const auto drop_metadata_name = metadata_file_without_extension + ".sql.tmp_drop"; @@ -134,6 +147,50 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t return {}; } + +BlockIO InterpreterDropQuery::executeToDictionary( + String & database_name_, + String & dictionary_name, + ASTDropQuery::Kind kind, + bool if_exists, + bool is_temporary, + bool no_ddl_lock) +{ + if (is_temporary) + throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + + String database_name = database_name_.empty() ? context.getCurrentDatabase() : database_name_; + + auto ddl_guard = (!no_ddl_lock ? context.getDDLGuard(database_name, dictionary_name) : nullptr); + + DatabasePtr database = tryGetDatabase(database_name, false); + + if (!database || !database->isDictionaryExist(context, dictionary_name)) + { + if (!if_exists) + throw Exception( + "Dictionary " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(dictionary_name) + " doesn't exist.", + ErrorCodes::UNKNOWN_DICTIONARY); + else + return {}; + } + + if (kind == ASTDropQuery::Kind::Detach) + { + /// Drop dictionary from memory, don't touch data and metadata + database->detachDictionary(dictionary_name); + } + else if (kind == ASTDropQuery::Kind::Truncate) + { + throw Exception("Cannot TRUNCATE dictionary", ErrorCodes::SYNTAX_ERROR); + } + else if (kind == ASTDropQuery::Kind::Drop) + { + database->removeDictionary(context, dictionary_name); + } + return {}; +} + BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDropQuery::Kind kind) { if (kind == ASTDropQuery::Kind::Detach) diff --git a/dbms/src/Interpreters/InterpreterDropQuery.h b/dbms/src/Interpreters/InterpreterDropQuery.h index 8ca91610cbb..114f2af5f85 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.h +++ b/dbms/src/Interpreters/InterpreterDropQuery.h @@ -32,6 +32,8 @@ private: BlockIO executeToTable(String & database_name, String & table_name, ASTDropQuery::Kind kind, bool if_exists, bool if_temporary, bool no_ddl_lock); + BlockIO executeToDictionary(String & database_name, String & table_name, ASTDropQuery::Kind kind, bool if_exists, bool if_temporary, bool no_ddl_lock); + DatabasePtr tryGetDatabase(String & database_name, bool exists); DatabaseAndTable tryGetDatabaseAndTable(String & database_name, String & table_name, bool if_exists); diff --git a/dbms/src/Interpreters/InterpreterExistsQuery.cpp b/dbms/src/Interpreters/InterpreterExistsQuery.cpp index 57682cbe30b..3d3b41a1818 100644 --- a/dbms/src/Interpreters/InterpreterExistsQuery.cpp +++ b/dbms/src/Interpreters/InterpreterExistsQuery.cpp @@ -13,6 +13,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + BlockIO InterpreterExistsQuery::execute() { BlockIO res; @@ -32,11 +37,24 @@ Block InterpreterExistsQuery::getSampleBlock() BlockInputStreamPtr InterpreterExistsQuery::executeImpl() { - const auto & ast = query_ptr->as(); - bool res = ast.temporary ? context.isExternalTableExist(ast.table) : context.isTableExist(ast.database, ast.table); + ASTQueryWithTableAndOutput * exists_query; + bool result = false; + if (exists_query = query_ptr->as(); exists_query) + { + if (exists_query->temporary) + result = context.isExternalTableExist(exists_query->table); + else + result = context.isTableExist(exists_query->database, exists_query->table); + } + else if (exists_query = query_ptr->as(); exists_query) + { + if (exists_query->temporary) + throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + result = context.isDictionaryExists(exists_query->database, exists_query->table); + } return std::make_shared(Block{{ - ColumnUInt8::create(1, res), + ColumnUInt8::create(1, result), std::make_shared(), "result" }}); } diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 6c9bd314b88..d27c9c8baeb 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -137,6 +137,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); @@ -145,6 +149,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/dbms/src/Interpreters/InterpreterShowCreateQuery.cpp b/dbms/src/Interpreters/InterpreterShowCreateQuery.cpp index 74299ffaf4a..1bc253c8aaf 100644 --- a/dbms/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -42,22 +42,30 @@ Block InterpreterShowCreateQuery::getSampleBlock() BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() { - /// FIXME: try to prettify this cast using `as<>()` - const auto & ast = dynamic_cast(*query_ptr); - - if (ast.temporary && !ast.database.empty()) - throw Exception("Temporary databases are not possible.", ErrorCodes::SYNTAX_ERROR); - ASTPtr create_query; - if (ast.temporary) - create_query = context.getCreateExternalTableQuery(ast.table); - else if (ast.table.empty()) - create_query = context.getCreateDatabaseQuery(ast.database); - else - create_query = context.getCreateTableQuery(ast.database, ast.table); + ASTQueryWithTableAndOutput * show_query; + if (show_query = query_ptr->as(); show_query) + { + if (show_query->temporary) + create_query = context.getCreateExternalTableQuery(show_query->table); + else + create_query = context.getCreateTableQuery(show_query->database, show_query->table); + } + else if (show_query = query_ptr->as(); show_query) + { + if (show_query->temporary) + throw Exception("Temporary databases are not possible.", ErrorCodes::SYNTAX_ERROR); + create_query = context.getCreateDatabaseQuery(show_query->database); + } + else if (show_query = query_ptr->as(); show_query) + { + if (show_query->temporary) + throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + create_query = context.getCreateDictionaryQuery(show_query->database, show_query->table); + } - if (!create_query && ast.temporary) - throw Exception("Unable to show the create query of " + ast.table + ". Maybe it was created by the system.", ErrorCodes::THERE_IS_NO_QUERY); + if (!create_query && show_query->temporary) + throw Exception("Unable to show the create query of " + show_query->table + ". Maybe it was created by the system.", ErrorCodes::THERE_IS_NO_QUERY); std::stringstream stream; formatAST(*create_query, stream, false, true); diff --git a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp index dcfe76adb82..f4fd8b77036 100644 --- a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -44,10 +44,21 @@ String InterpreterShowTablesQuery::getRewrittenQuery() context.assertDatabaseExists(database, false); std::stringstream rewritten_query; - rewritten_query << "SELECT name FROM system.tables WHERE "; + rewritten_query << "SELECT name FROM system."; + + if (query.dictionaries) + rewritten_query << "dictionaries "; + else + rewritten_query << "tables "; + + rewritten_query << "WHERE "; if (query.temporary) + { + if (query.dictionaries) + throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); rewritten_query << "is_temporary"; + } else rewritten_query << "database = " << std::quoted(database, '\''); diff --git a/dbms/src/Parsers/ASTCreateQuery.cpp b/dbms/src/Parsers/ASTCreateQuery.cpp index bdade881b2c..f57965bf556 100644 --- a/dbms/src/Parsers/ASTCreateQuery.cpp +++ b/dbms/src/Parsers/ASTCreateQuery.cpp @@ -237,10 +237,10 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } else { - /// Always CREATE and always DICTIONARY - settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE DICTIONARY " << (if_not_exists ? "IF NOT EXISTS " : "") - << (settings.hilite ? hilite_none : "") << (!database.empty() ? backQuoteIfNeed(database) + "." : "") - << backQuoteIfNeed(table); + /// Always DICTIONARY + settings.ostr << (settings.hilite ? hilite_keyword : "") << (attach ? "ATTACH " : "CREATE ") << "DICTIONARY " + << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); } if (as_table_function) diff --git a/dbms/src/Parsers/ASTDictionary.cpp b/dbms/src/Parsers/ASTDictionary.cpp index 532190ae2d2..ec750acff31 100644 --- a/dbms/src/Parsers/ASTDictionary.cpp +++ b/dbms/src/Parsers/ASTDictionary.cpp @@ -24,8 +24,7 @@ void ASTDictionaryRange::formatImpl(const FormatSettings & settings, << "(" << (settings.hilite ? hilite_keyword : "") << "MIN " - << (settings.hilite ? hilite_none : "") - << min_attr_name << ", " + << min_attr_name << " " << (settings.hilite ? hilite_keyword : "") << "MAX " << (settings.hilite ? hilite_none : "") @@ -54,8 +53,7 @@ void ASTDictionaryLifetime::formatImpl(const FormatSettings & settings, << "(" << (settings.hilite ? hilite_keyword : "") << "MIN " - << (settings.hilite ? hilite_none : "") - << min_sec << ", " + << min_sec << " " << (settings.hilite ? hilite_keyword : "") << "MAX " << (settings.hilite ? hilite_none : "") @@ -133,14 +131,17 @@ void ASTDictionary::formatImpl(const FormatSettings & settings, FormatState & st if (primary_key) { settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "PRIMARY KEY " - << (settings.hilite ? hilite_none : ""); + << (settings.hilite ? hilite_none : ""); primary_key->formatImpl(settings, state, frame); } if (source) - settings.ostr << settings.nl_or_ws; - - source->formatImpl(settings, state, frame); + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SOURCE(" + << (settings.hilite ? hilite_none : ""); + source->formatImpl(settings, state, frame); + settings.ostr << ")"; + } if (lifetime) { diff --git a/dbms/src/Parsers/ASTShowTablesQuery.cpp b/dbms/src/Parsers/ASTShowTablesQuery.cpp index 4a33aeba99c..774ba2f6b53 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.cpp +++ b/dbms/src/Parsers/ASTShowTablesQuery.cpp @@ -21,7 +21,8 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format } else { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << "TABLES" << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << + (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : ""); if (!from.empty()) settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") @@ -40,4 +41,3 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format } } - diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index f3500f437c3..fd0d5ff6379 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -15,6 +15,7 @@ class ASTShowTablesQuery : public ASTQueryWithOutput { public: bool databases{false}; + bool dictionaries{false}; bool temporary{false}; String from; String like; diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index ce1490d18bd..094d29628f7 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -823,6 +823,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); + ParserKeyword s_attach("ATTACH"); ParserKeyword s_dictionary("DICTIONARY"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserIdentifier name_p; @@ -840,8 +841,14 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E ASTPtr attributes; ASTPtr dictionary; + bool attach = false; if (!s_create.ignore(pos, expected)) - return false; + { + if (s_attach.ignore(pos, expected)) + attach = true; + else + return false; + } if (s_if_not_exists.ignore(pos, expected)) if_not_exists = true; @@ -859,21 +866,25 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E return false; } - if (!s_left_paren.ignore(pos, expected)) - return false; + if (!attach) + { + if (!s_left_paren.ignore(pos, expected)) + return false; - if (!attributes_p.parse(pos, attributes, expected)) - return false; + if (!attributes_p.parse(pos, attributes, expected)) + return false; - if (!s_right_paren.ignore(pos, expected)) - return false; + if (!s_right_paren.ignore(pos, expected)) + return false; - if (!dictionary_p.parse(pos, dictionary, expected)) - return false; + if (!dictionary_p.parse(pos, dictionary, expected)) + return false; + } auto query = std::make_shared(); node = query; query->is_dictionary = true; + query->attach = attach; if (database) query->database = typeid_cast(*database).name; diff --git a/dbms/src/Parsers/ParserShowTablesQuery.cpp b/dbms/src/Parsers/ParserShowTablesQuery.cpp index 3fe43c4557d..7d443da4f9f 100644 --- a/dbms/src/Parsers/ParserShowTablesQuery.cpp +++ b/dbms/src/Parsers/ParserShowTablesQuery.cpp @@ -20,6 +20,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_tables("TABLES"); ParserKeyword s_databases("DATABASES"); + ParserKeyword s_dictionaries("DICTIONARIES"); ParserKeyword s_from("FROM"); ParserKeyword s_not("NOT"); ParserKeyword s_like("LIKE"); @@ -45,33 +46,36 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_temporary.ignore(pos)) query->temporary = true; - if (s_tables.ignore(pos, expected)) + if (!s_tables.ignore(pos, expected)) { - if (s_from.ignore(pos, expected)) - { - if (!name_p.parse(pos, database, expected)) - return false; - } - - if (s_not.ignore(pos, expected)) - query->not_like = true; - - if (s_like.ignore(pos, expected)) - { - if (!like_p.parse(pos, like, expected)) - return false; - } - else if (query->not_like) + if (s_dictionaries.ignore(pos, expected)) + query->dictionaries = true; + else return false; - - if (s_limit.ignore(pos, expected)) - { - if (!limit_p.parse(pos, query->limit_length, expected)) - return false; - } } - else + + if (s_from.ignore(pos, expected)) + { + if (!name_p.parse(pos, database, expected)) + return false; + } + + if (s_not.ignore(pos, expected)) + query->not_like = true; + + if (s_like.ignore(pos, expected)) + { + if (!like_p.parse(pos, like, expected)) + return false; + } + else if (query->not_like) return false; + + if (s_limit.ignore(pos, expected)) + { + if (!limit_p.parse(pos, query->limit_length, expected)) + return false; + } } tryGetIdentifierNameInto(database, query->from); diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index c6f7d4ac9ae..5674c9954bf 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include #include #include @@ -16,36 +19,76 @@ namespace DB { +namespace +{ + +NameSet getFilteredDatabases(const ASTPtr & query, const Context & context) +{ + MutableColumnPtr column = ColumnString::create(); + for (const auto & db : context.getDatabases()) + column->insert(db.first); + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithQuery(query, block, context); + NameSet result; + for (size_t i = 0; i < block.rows(); ++i) + result.insert(block.getByPosition(0).column->getDataAt(i).toString()); + return result; +} + + +NameSet getFilteredDictionaries(const ASTPtr & query, const Context & context, const DatabasePtr & database) +{ + MutableColumnPtr column = ColumnString::create(); + auto dicts_it = database->getDictionariesIterator(context); + while (dicts_it->isValid()) + { + column->insert(dicts_it->name()); + dicts_it->next(); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "dictionary")}; + VirtualColumnUtils::filterBlockWithQuery(query, block, context); + NameSet result; + for (size_t i = 0; i < block.rows(); ++i) + result.insert(block.getByPosition(0).column->getDataAt(i).toString()); + return result; +} + +} + NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() { return { - { "name", std::make_shared() }, - { "status", std::make_shared(ExternalLoader::getStatusEnumAllPossibleValues()) }, - { "origin", std::make_shared() }, - { "type", std::make_shared() }, - { "key", std::make_shared() }, - { "attribute.names", std::make_shared(std::make_shared()) }, - { "attribute.types", std::make_shared(std::make_shared()) }, - { "bytes_allocated", std::make_shared() }, - { "query_count", std::make_shared() }, - { "hit_rate", std::make_shared() }, - { "element_count", std::make_shared() }, - { "load_factor", std::make_shared() }, - { "source", std::make_shared() }, - { "loading_start_time", std::make_shared() }, - { "loading_duration", std::make_shared() }, + {"database", std::make_shared()}, + {"name", std::make_shared()}, + {"status", std::make_shared(ExternalLoader::getStatusEnumAllPossibleValues())}, + {"origin", std::make_shared()}, + {"type", std::make_shared()}, + {"key", std::make_shared()}, + {"attribute.names", std::make_shared(std::make_shared())}, + {"attribute.types", std::make_shared(std::make_shared())}, + {"bytes_allocated", std::make_shared()}, + {"query_count", std::make_shared()}, + {"hit_rate", std::make_shared()}, + {"element_count", std::make_shared()}, + {"load_factor", std::make_shared()}, + {"source", std::make_shared()}, + {"loading_start_time", std::make_shared()}, + {"loading_duration", std::make_shared()}, //{ "creation_time", std::make_shared() }, - { "last_exception", std::make_shared() }, + {"last_exception", std::make_shared()}, }; } -void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const +void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const { const auto & external_dictionaries = context.getExternalDictionariesLoader(); for (const auto & [dict_name, load_result] : external_dictionaries.getCurrentLoadResults()) { size_t i = 0; + res_columns[i++]->insert(""); res_columns[i++]->insert(dict_name); res_columns[i++]->insert(static_cast(load_result.status)); res_columns[i++]->insert(load_result.origin); @@ -85,6 +128,27 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con else res_columns[i++]->insertDefault(); } + + /// Temporary code for testing TODO(alesapin) + NameSet databases = getFilteredDatabases(query_info.query, context); + for (auto database : databases) + { + DatabasePtr database_ptr = context.getDatabase(database); + auto dictionaries_set = getFilteredDictionaries(query_info.query, context, database_ptr); + auto filter = [&dictionaries_set](const String & dict_name) { return dictionaries_set.count(dict_name); }; + auto dictionaries_it = database_ptr->getDictionariesIterator(context, filter); + while (dictionaries_it->isValid()) + { + size_t i = 0; + res_columns[i++]->insert(database); + res_columns[i++]->insert(dictionaries_it->name()); + for (size_t j = 0; j < getNamesAndTypes().size() - 2; ++j) + res_columns[i++]->insertDefault(); + + dictionaries_it->next(); + } + + } } } diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 9ac3245a752..d73c72a927d 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -86,7 +86,11 @@ public: UInt64 max_block_size_, ColumnPtr databases_, const Context & context_) - : columns_mask(std::move(columns_mask_)), header(std::move(header_)), max_block_size(max_block_size_), databases(std::move(databases_)), context(context_) {} + : columns_mask(std::move(columns_mask_)) + , header(std::move(header_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(context_) {} String getName() const override { return "Tables"; } Block getHeader() const override { return header; } From dca0798d4d5b954a8402731a3324fb7333fb6876 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Oct 2019 16:51:31 +0300 Subject: [PATCH 007/122] Compilable code --- dbms/src/Databases/DatabaseLazy.cpp | 2 -- dbms/src/Databases/DatabaseLazy.h | 4 +-- dbms/src/Databases/DatabaseMySQL.cpp | 2 +- dbms/src/Databases/DatabaseMySQL.h | 41 +++++++++++++++++++++++++++- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 89b8b51fd85..7a5dd08c9c5 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -29,8 +29,6 @@ namespace ErrorCodes } -static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; - DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, const Context & context) : name(name_) diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index 16a2b859519..a8ef26c8c02 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -75,11 +75,11 @@ public: ASTPtr getCreateDictionaryQuery( const Context & context, - const String & table_name) const override; + const String & dictionary_name) const override; ASTPtr tryGetCreateDictionaryQuery( const Context & context, - const String & table_name) const override; + const String & dictionary_name) const override; ASTPtr getCreateDatabaseQuery(const Context & context) const override; diff --git a/dbms/src/Databases/DatabaseMySQL.cpp b/dbms/src/Databases/DatabaseMySQL.cpp index 491dfba1bcf..8e0e092b1d8 100644 --- a/dbms/src/Databases/DatabaseMySQL.cpp +++ b/dbms/src/Databases/DatabaseMySQL.cpp @@ -107,7 +107,7 @@ ASTPtr DatabaseMySQL::tryGetCreateTableQuery(const Context &, const String & tab return local_tables_cache[table_name].create_table_query; } -time_t DatabaseMySQL::getTableMetadataModificationTime(const Context &, const String & table_name) +time_t DatabaseMySQL::getObjectMetadataModificationTime(const Context &, const String & table_name) { std::lock_guard lock(mutex); diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index 89500c84c21..7419cad3306 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -31,15 +31,32 @@ public: DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; + DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context &, const FilterByNameFunction & = {}) override + { + return nullptr; + } + ASTPtr getCreateDatabaseQuery(const Context & context) const override; bool isTableExist(const Context & context, const String & name) const override; + bool isDictionaryExist(const Context &, const String &) const override { return false; } + StoragePtr tryGetTable(const Context & context, const String & name) const override; + DictionaryPtr tryGetDictionary(const Context &, const String &) const override { return {}; } + ASTPtr tryGetCreateTableQuery(const Context & context, const String & name) const override; - time_t getTableMetadataModificationTime(const Context & context, const String & name) override; + ASTPtr getCreateDictionaryQuery(const Context &, const String &) const override + { + throw Exception("MySQL database engine does not support dictionaries.", ErrorCodes::NOT_IMPLEMENTED); + } + + ASTPtr tryGetCreateDictionaryQuery(const Context &, const String &) const override { return nullptr; } + + + time_t getObjectMetadataModificationTime(const Context & context, const String & name) override; void shutdown() override; @@ -48,6 +65,11 @@ public: throw Exception("MySQL database engine does not support detach table.", ErrorCodes::NOT_IMPLEMENTED); } + DictionaryPtr detachDictionary(const String &) override + { + throw Exception("MySQL database engine does not support detach dictionary.", ErrorCodes::NOT_IMPLEMENTED); + } + void loadStoredObjects(Context &, bool) override { /// do nothing @@ -58,16 +80,33 @@ public: throw Exception("MySQL database engine does not support remove table.", ErrorCodes::NOT_IMPLEMENTED); } + void removeDictionary(const Context &, const String &) override + { + throw Exception("MySQL database engine does not support remove dictionary.", ErrorCodes::NOT_IMPLEMENTED); + } + + void attachTable(const String &, const StoragePtr &) override { throw Exception("MySQL database engine does not support attach table.", ErrorCodes::NOT_IMPLEMENTED); } + void attachDictionary(const String &, const DictionaryPtr &) override + { + throw Exception("MySQL database engine does not support attach dictionary.", ErrorCodes::NOT_IMPLEMENTED); + } + void createTable(const Context &, const String &, const StoragePtr &, const ASTPtr &) override { throw Exception("MySQL database engine does not support create table.", ErrorCodes::NOT_IMPLEMENTED); } + void createDictionary(const Context &, const String &, const DictionaryPtr &, const ASTPtr &) override + { + throw Exception("MySQL database engine does not support create dictionary.", ErrorCodes::NOT_IMPLEMENTED); + } + + private: struct MySQLStorageInfo { From 967fe647939464608ce2dcf9d224b23a25e73d4a Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Oct 2019 18:22:30 +0300 Subject: [PATCH 008/122] Add simpliest tests --- dbms/src/Databases/DatabaseOnDisk.cpp | 8 +- .../src/Interpreters/InterpreterDropQuery.cpp | 6 + .../System/StorageSystemDictionaries.cpp | 4 +- .../01018_ddl_dictionaries_create.reference | 25 ++++ .../01018_ddl_dictionaries_create.sql | 114 ++++++++++++++++++ 5 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index 5b55f9a402c..f77d0476e88 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -115,8 +115,8 @@ ASTPtr parseCreateQueryFromMetadataFile(const String & filepath, Poco::Logger * return nullptr; } - ParserCreateQuery parser_table; - ASTPtr result = parseQuery(parser_table, definition, "in file " + filepath, 0); + ParserCreateQuery parser_create; + ASTPtr result = parseQuery(parser_create, definition, "in file " + filepath, 0); return result; } @@ -165,8 +165,10 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) ASTPtr query_clone = query->clone(); auto & create = query_clone->as(); + if (!create.is_dictionary) + create.attach = true; + /// We remove everything that is not needed for ATTACH from the query. - create.attach = true; create.database.clear(); create.as_database.clear(); create.as_table.clear(); diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 15799e5df26..3a414ea5121 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -247,6 +247,12 @@ BlockIO InterpreterDropQuery::executeToDatabase(String & database_name, ASTDropQ executeToTable(database_name, current_table_name, kind, false, false, false); } + for (auto iterator = database->getDictionariesIterator(context); iterator->isValid(); iterator->next()) + { + String current_dictionary = iterator->name(); + executeToDictionary(database_name, current_dictionary, kind, false, false, false); + } + auto context_lock = context.getLock(); /// Someone could have time to delete the database before us. diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 5674c9954bf..6846b915f09 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -41,7 +41,7 @@ NameSet getFilteredDictionaries(const ASTPtr & query, const Context & context, c { MutableColumnPtr column = ColumnString::create(); auto dicts_it = database->getDictionariesIterator(context); - while (dicts_it->isValid()) + while (dicts_it && dicts_it->isValid()) { column->insert(dicts_it->name()); dicts_it->next(); @@ -137,7 +137,7 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con auto dictionaries_set = getFilteredDictionaries(query_info.query, context, database_ptr); auto filter = [&dictionaries_set](const String & dict_name) { return dictionaries_set.count(dict_name); }; auto dictionaries_it = database_ptr->getDictionariesIterator(context, filter); - while (dictionaries_it->isValid()) + while (dictionaries_it && dictionaries_it->isValid()) { size_t i = 0; res_columns[i++]->insert(database); diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference new file mode 100644 index 00000000000..c3c7827aef1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -0,0 +1,25 @@ +=DICTIONARY in Ordinary DB +CREATE DICTIONARY ordinary_db.dict1 (`key_column` UInt64 DEFAULT 0, `second_column` UInt8 DEFAULT 1, `third_column` String DEFAULT \'qqq\') PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) +dict1 +1 +ordinary_db dict1 +==DETACH DICTIONARY +0 +==ATTACH DICTIONARY +dict1 +1 +ordinary_db dict1 +==DROP DICTIONARY +0 +=DICTIONARY in Memory DB +dict2 +1 +memory_db dict2 +==DETACH DICTIONARY +0 +==ATTACH DICTIONARY +dict2 +1 +memory_db dict2 +==DROP DICTIONARY +0 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql new file mode 100644 index 00000000000..7ed85041f5f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -0,0 +1,114 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS ordinary_db; + +CREATE DATABASE ordinary_db ENGINE = Ordinary; + +SELECT '=DICTIONARY in Ordinary DB'; + +DROP DICTIONARY IF EXISTS ordinary_db.dict1; + +CREATE DICTIONARY ordinary_db.dict1 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SHOW CREATE DICTIONARY ordinary_db.dict1; + +SHOW DICTIONARIES FROM ordinary_db LIKE 'dict1'; + +EXISTS DICTIONARY ordinary_db.dict1; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict1'; + +SELECT '==DETACH DICTIONARY'; +DETACH DICTIONARY ordinary_db.dict1; + +SHOW DICTIONARIES FROM ordinary_db LIKE 'dict1'; + +EXISTS DICTIONARY ordinary_db.dict1; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict1'; + +SELECT '==ATTACH DICTIONARY'; +ATTACH DICTIONARY ordinary_db.dict1; + +SHOW DICTIONARIES FROM ordinary_db LIKE 'dict1'; + +EXISTS DICTIONARY ordinary_db.dict1; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict1'; + +SELECT '==DROP DICTIONARY'; + +DROP DICTIONARY IF EXISTS ordinary_db.dict1; + +SHOW DICTIONARIES FROM ordinary_db LIKE 'dict1'; + +EXISTS DICTIONARY ordinary_db.dict1; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict1'; + +DROP DATABASE IF EXISTS ordinary_db; + +DROP DATABASE IF EXISTS memory_db; + +CREATE DATABASE memory_db ENGINE = Memory; + +SELECT '=DICTIONARY in Memory DB'; + +CREATE DICTIONARY memory_db.dict2 +( + key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, + second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SHOW CREATE DICTIONARY memory_db.dict2; -- {serverError 500} + +SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; + +EXISTS DICTIONARY memory_db.dict2; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; + +SELECT '==DETACH DICTIONARY'; +DETACH DICTIONARY memory_db.dict2; + +SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; + +EXISTS DICTIONARY memory_db.dict2; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; + +SELECT '==ATTACH DICTIONARY'; + +ATTACH DICTIONARY memory_db.dict2; + +SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; + +EXISTS DICTIONARY memory_db.dict2; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; + +SELECT '==DROP DICTIONARY'; + +DROP DICTIONARY IF EXISTS memory_db.dict2; + +SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; + +EXISTS DICTIONARY memory_db.dict2; + +SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; + +DROP DATABASE IF EXISTS memory_db; From 653aa7c0054b3b396c4fde3f5b725ce0d76cf66e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Oct 2019 18:41:52 +0300 Subject: [PATCH 009/122] Fix unit tests and segfault --- dbms/src/Databases/DatabaseDictionary.cpp | 2 +- dbms/src/Databases/DatabaseLazy.cpp | 2 +- dbms/src/Databases/IDatabase.h | 3 ++- .../Parsers/tests/gtest_dictionary_parser.cpp | 2 +- .../System/StorageSystemDictionaries.cpp | 4 ++-- .../01018_ddl_dictionaries_create.sql | 17 +++++++++++++++++ 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 403725a7234..f76a3f0e67e 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -85,7 +85,7 @@ DatabaseDictionariesIteratorPtr DatabaseDictionary::getDictionariesIterator( const Context & /*context*/, const FilterByNameFunction & /*filter_by_dictionary_name*/) { - return nullptr; + return std::make_unique(); } diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 7a5dd08c9c5..75fd2ff2996 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -123,7 +123,7 @@ DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( const Context & /*context*/, const FilterByNameFunction & /*filter_by_dictionary_name*/) { - throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); + return std::make_unique(); } void DatabaseLazy::attachDictionary( diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index edddc75fbe9..690518727c3 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -67,13 +67,14 @@ private: Dictionaries::iterator it; public: + DatabaseDictionariesSnapshotIterator() = default; DatabaseDictionariesSnapshotIterator(Dictionaries & dictionaries_) : dictionaries(dictionaries_), it(dictionaries.begin()) {} DatabaseDictionariesSnapshotIterator(Dictionaries && dictionaries_) : dictionaries(dictionaries_), it(dictionaries.begin()) {} void next() { ++it; } - bool isValid() const { return it != dictionaries.end(); } + bool isValid() const { return !dictionaries.empty() && it != dictionaries.end(); } const String & name() const { return it->first; } diff --git a/dbms/src/Parsers/tests/gtest_dictionary_parser.cpp b/dbms/src/Parsers/tests/gtest_dictionary_parser.cpp index 6d3c964bc45..934eb10f9a6 100644 --- a/dbms/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/dbms/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -291,7 +291,7 @@ TEST(ParserDictionaryDDL, Formatting) ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); ASTCreateQuery * create = ast->as(); auto str = serializeAST(*create, true); - EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '') LIFETIME(MIN 1, MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column, MAX third_column)"); + EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column MAX third_column)"); } TEST(ParserDictionaryDDL, ParseDropQuery) diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 6846b915f09..5674c9954bf 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -41,7 +41,7 @@ NameSet getFilteredDictionaries(const ASTPtr & query, const Context & context, c { MutableColumnPtr column = ColumnString::create(); auto dicts_it = database->getDictionariesIterator(context); - while (dicts_it && dicts_it->isValid()) + while (dicts_it->isValid()) { column->insert(dicts_it->name()); dicts_it->next(); @@ -137,7 +137,7 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con auto dictionaries_set = getFilteredDictionaries(query_info.query, context, database_ptr); auto filter = [&dictionaries_set](const String & dict_name) { return dictionaries_set.count(dict_name); }; auto dictionaries_it = database_ptr->getDictionariesIterator(context, filter); - while (dictionaries_it && dictionaries_it->isValid()) + while (dictionaries_it->isValid()) { size_t i = 0; res_columns[i++]->insert(database); diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 7ed85041f5f..4d9f46ee3e6 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -112,3 +112,20 @@ EXISTS DICTIONARY memory_db.dict2; SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; DROP DATABASE IF EXISTS memory_db; + +DROP DATABASE IF EXISTS dictionary_db; + +CREATE DATABASE dictionary_db ENGINE = Dictionary; + +CREATE DICTIONARY dictionary_db.dict2 +( + key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, + second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column, second_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); -- {serverError 1} + +DROP DATABASE IF EXISTS dictionary_db; From 1174fc346bba33285a8be96e50d8c947edac46c5 Mon Sep 17 00:00:00 2001 From: VDimir Date: Sat, 12 Oct 2019 11:42:43 +0000 Subject: [PATCH 010/122] Add isValidJSON function --- dbms/src/Functions/FunctionsJSON.cpp | 1 + dbms/src/Functions/FunctionsJSON.h | 23 ++++++++++++++++--- .../00918_json_functions.reference | 4 ++++ .../0_stateless/00918_json_functions.sql | 5 ++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/FunctionsJSON.cpp b/dbms/src/Functions/FunctionsJSON.cpp index 62413ce58d4..eedbecf852d 100644 --- a/dbms/src/Functions/FunctionsJSON.cpp +++ b/dbms/src/Functions/FunctionsJSON.cpp @@ -8,6 +8,7 @@ namespace DB void registerFunctionsJSON(FunctionFactory & factory) { factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h index b9fddf57d39..ee556f480d7 100644 --- a/dbms/src/Functions/FunctionsJSON.h +++ b/dbms/src/Functions/FunctionsJSON.h @@ -279,6 +279,7 @@ private: struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; struct NameJSONType { static constexpr auto name{"JSONType"}; }; @@ -292,11 +293,23 @@ struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKey struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -template -class JSONHasImpl +template +class JSONCheckImpl { public: - static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) { + if constexpr (!support_key_lookup) { + if (arguments.size() != 1) + throw Exception{"Function " + String(function_name) + " needs exactly one argument", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + } + else + { + UNUSED(function_name); + UNUSED(arguments); + } + return std::make_shared(); + } using Iterator = typename JSONParser::Iterator; static bool addValueToColumn(IColumn & dest, const Iterator &) @@ -310,6 +323,10 @@ public: static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {} }; +template +using JSONHasImpl = JSONCheckImpl; +template +using isValidJSONImpl = JSONCheckImpl; template class JSONLengthImpl diff --git a/dbms/tests/queries/0_stateless/00918_json_functions.reference b/dbms/tests/queries/0_stateless/00918_json_functions.reference index a23b177d468..181da3dd3a0 100644 --- a/dbms/tests/queries/0_stateless/00918_json_functions.reference +++ b/dbms/tests/queries/0_stateless/00918_json_functions.reference @@ -7,6 +7,10 @@ 1 1 0 +--isValidJSON-- +1 +0 +0 --JSONKey-- a b diff --git a/dbms/tests/queries/0_stateless/00918_json_functions.sql b/dbms/tests/queries/0_stateless/00918_json_functions.sql index 38bf0a7ffec..4cb2445ca2a 100644 --- a/dbms/tests/queries/0_stateless/00918_json_functions.sql +++ b/dbms/tests/queries/0_stateless/00918_json_functions.sql @@ -11,6 +11,11 @@ SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'a'); SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b'); SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'c'); +SELECT '--isValidJSON--'; +SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}'); +SELECT isValidJSON('not a json'); +SELECT isValidJSON('"HX-='); + SELECT '--JSONKey--'; SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', 1); SELECT JSONKey('{"a": "hello", "b": [-100, 200.0, 300]}', 2); From 9a9a234410fdde007f2c9256e7cd2bde6d8d6aec Mon Sep 17 00:00:00 2001 From: VDimir Date: Sat, 12 Oct 2019 12:00:46 +0000 Subject: [PATCH 011/122] Add doc for isValidJSON --- docs/en/query_language/functions/json_functions.md | 11 +++++++++++ docs/ru/query_language/functions/json_functions.md | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/docs/en/query_language/functions/json_functions.md b/docs/en/query_language/functions/json_functions.md index 4de06ae53fb..6ab942bd012 100644 --- a/docs/en/query_language/functions/json_functions.md +++ b/docs/en/query_language/functions/json_functions.md @@ -57,6 +57,17 @@ There is currently no support for code points in the format `\uXXXX\uYYYY` that The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. The assumption 2 mentioned above still applies. +## isValidJSON(json) + +Checks that passed string is a valid json. + +Examples: + +```sql +SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 +SELECT isValidJSON('not a json') = 0 +``` + ## JSONHas(json[, indices_or_keys]...) If the value exists in the JSON document, `1` will be returned. diff --git a/docs/ru/query_language/functions/json_functions.md b/docs/ru/query_language/functions/json_functions.md index d1de97ef10e..49f575f4b78 100644 --- a/docs/ru/query_language/functions/json_functions.md +++ b/docs/ru/query_language/functions/json_functions.md @@ -57,6 +57,17 @@ visitParamExtractString('{"abc":"hello}', 'abc') = '' Следующие функции используют [simdjson](https://github.com/lemire/simdjson) который разработан по более сложны требования для разбора JSON. Упомянутое выше предположение 2 по-прежнему применимо. +## isValidJSON(json) + +Проверяет, является ли переданная строка валидным json значением. + +Примеры: + +```sql +SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 +SELECT isValidJSON('not a json') = 0 +``` + ## JSONHas(json[, indices_or_keys]...) Если значение существует в документе JSON, то возвращается `1`. From 62c7c50aba8c3e5eccfe44423ca14cf58a4662db Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 12 Oct 2019 20:00:32 +0300 Subject: [PATCH 012/122] Update FunctionsJSON.h --- dbms/src/Functions/FunctionsJSON.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h index ee556f480d7..03383b59298 100644 --- a/dbms/src/Functions/FunctionsJSON.h +++ b/dbms/src/Functions/FunctionsJSON.h @@ -297,8 +297,10 @@ template class JSONCheckImpl { public: - static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) { - if constexpr (!support_key_lookup) { + static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if constexpr (!support_key_lookup) + { if (arguments.size() != 1) throw Exception{"Function " + String(function_name) + " needs exactly one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; From 67ea66604fb720f848b2e1bce1ac2a47c21f85c5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 14 Oct 2019 12:21:29 +0300 Subject: [PATCH 013/122] Fix segmentation fault in database factory --- dbms/src/Databases/DatabaseFactory.cpp | 12 +++++++--- .../01018_ddl_dictionaries_create.reference | 2 ++ .../01018_ddl_dictionaries_create.sql | 22 +++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/dbms/src/Databases/DatabaseFactory.cpp b/dbms/src/Databases/DatabaseFactory.cpp index fe81bd3d495..5b6456fc8b8 100644 --- a/dbms/src/Databases/DatabaseFactory.cpp +++ b/dbms/src/Databases/DatabaseFactory.cpp @@ -53,7 +53,10 @@ DatabasePtr DatabaseFactory::get( else if (engine_name == "MySQL") { const ASTFunction * engine = engine_define->engine; - const auto & arguments = engine->arguments->children; + + std::vector arguments; + if (engine->arguments) + arguments = engine->arguments->children; if (arguments.size() != 4) throw Exception("MySQL Database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.", @@ -74,10 +77,13 @@ DatabasePtr DatabaseFactory::get( else if (engine_name == "Lazy") { const ASTFunction * engine = engine_define->engine; - const auto & arguments = engine->arguments->children; + + std::vector arguments; + if (engine->arguments) + arguments = engine->arguments->children; if (arguments.size() != 1) - throw Exception("Lazy database require cache_expiration_time_seconds argument.", + throw Exception("Lazy database require cache_expiration_time_seconds argument", ErrorCodes::BAD_ARGUMENTS); const auto cache_expiration_time_seconds = arguments[0]->as()->value.safeGet(); diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index c3c7827aef1..2b0f92c79d7 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -23,3 +23,5 @@ dict2 memory_db dict2 ==DROP DICTIONARY 0 +=DICTIONARY in Dictionary DB +=DICTIONARY in Lazy DB diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 4d9f46ee3e6..56d232587fc 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -117,6 +117,8 @@ DROP DATABASE IF EXISTS dictionary_db; CREATE DATABASE dictionary_db ENGINE = Dictionary; +SELECT '=DICTIONARY in Dictionary DB'; + CREATE DICTIONARY dictionary_db.dict2 ( key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, @@ -129,3 +131,23 @@ LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); -- {serverError 1} DROP DATABASE IF EXISTS dictionary_db; + +SELECT '=DICTIONARY in Lazy DB'; + +DROP DATABASE IF EXISTS lazy_db; + +CREATE DATABASE lazy_db ENGINE = Lazy(1); + +CREATE DICTIONARY lazy_db.dict3 +( + key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, + second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column, second_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); -- {serverError 1} + +DROP DATABASE IF EXISTS lazy_db; + From ecde224692b322760721ff84a7d41a14cc95b9db Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 14 Oct 2019 12:52:43 +0300 Subject: [PATCH 014/122] More tests --- dbms/src/Databases/DatabasesCommon.cpp | 1 + .../src/Interpreters/InterpreterDropQuery.cpp | 1 - .../01018_ddl_dictionaries_create.reference | 3 ++ .../01018_ddl_dictionaries_create.sql | 40 +++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index db625ac62af..1ea751aa085 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -159,6 +159,7 @@ void DatabaseWithOwnTablesBase::shutdown() std::lock_guard lock(mutex); tables.clear(); + dictionaries.clear(); } DatabaseWithOwnTablesBase::~DatabaseWithOwnTablesBase() diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 8982bc14467..3b8dd382ac7 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -108,7 +108,6 @@ BlockIO InterpreterDropQuery::executeToTable( database_and_table.first->getMetadataPath() + escapeForFileName(database_and_table.second->getTableName()); - /// NOTE seems like duplication of the code inside removeTable method of database const auto prev_metadata_name = metadata_file_without_extension + ".sql"; const auto drop_metadata_name = metadata_file_without_extension + ".sql.tmp_drop"; diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index 2b0f92c79d7..53d2a29fce2 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -25,3 +25,6 @@ memory_db dict2 0 =DICTIONARY in Dictionary DB =DICTIONARY in Lazy DB +=DROP DATABASE WITH DICTIONARY +dict4 +dict4 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 56d232587fc..0443931d46d 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -151,3 +151,43 @@ LAYOUT(FLAT()); -- {serverError 1} DROP DATABASE IF EXISTS lazy_db; + +SELECT '=DROP DATABASE WITH DICTIONARY'; + +DROP DATABASE IF EXISTS ordinary_db; + +CREATE DATABASE ordinary_db ENGINE = Ordinary; + +CREATE DICTIONARY ordinary_db.dict4 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SHOW DICTIONARIES FROM ordinary_db; + +DROP DATABASE IF EXISTS ordinary_db; + +CREATE DATABASE ordinary_db ENGINE = Ordinary; + +SHOW DICTIONARIES FROM ordinary_db; + +CREATE DICTIONARY ordinary_db.dict4 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SHOW DICTIONARIES FROM ordinary_db; + +DROP DATABASE IF EXISTS ordinary_db; From 1ce0eb6e637aa70568afa38f03dbf6dd33a235f1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 14 Oct 2019 13:04:10 +0300 Subject: [PATCH 015/122] Appropriate error codes numbers --- dbms/src/Common/ErrorCodes.cpp | 7 ++++--- dbms/src/Databases/DatabaseMemory.cpp | 1 - dbms/src/Databases/DatabaseOrdinary.cpp | 3 ++- dbms/src/Databases/DatabasesCommon.cpp | 3 ++- dbms/src/Dictionaries/DictionaryFactory.cpp | 1 + dbms/src/Dictionaries/DictionaryFactory.h | 1 + .../queries/0_stateless/01018_ddl_dictionaries_create.sql | 2 +- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 608e2672dcc..1dbca428ab2 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -459,9 +459,10 @@ namespace ErrorCodes extern const int DICTIONARY_ACCESS_DENIED = 482; extern const int TOO_MANY_REDIRECTS = 483; extern const int INTERNAL_REDIS_ERROR = 484; - extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 500; - extern const int DICTIONARY_ALREADY_EXISTS = 501; - extern const int UNKNOWN_DICTIONARY = 502; + extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 485; + extern const int DICTIONARY_ALREADY_EXISTS = 486; + extern const int UNKNOWN_DICTIONARY = 487; + extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED = 488; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index a3cb5f65010..0fd4d04ec12 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -10,7 +10,6 @@ namespace ErrorCodes { extern const int CANNOT_GET_CREATE_TABLE_QUERY; extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY; - extern const int NOT_IMPLEMENTED; } DatabaseMemory::DatabaseMemory(String name_) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 75c2db29495..98243f4964e 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -41,6 +41,7 @@ namespace ErrorCodes extern const int CANNOT_CREATE_DICTIONARY_FROM_METADATA; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; extern const int CANNOT_PARSE_TEXT; + extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED; } @@ -60,7 +61,7 @@ std::pair createDictionaryFromAST( ast_create_query.database = database_name; if (!ast_create_query.dictionary_attributes_list) - throw Exception("Missing definition of dictionary attributes.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + throw Exception("Missing definition of dictionary attributes.", ErrorCodes::EMPTY_LIST_OF_ATTRIBUTES_PASSED); return { diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 1ea751aa085..d7485e0b91b 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int TABLE_ALREADY_EXISTS; extern const int UNKNOWN_TABLE; extern const int LOGICAL_ERROR; + extern const int DICTIONARY_ALREADY_EXISTS; } bool DatabaseWithOwnTablesBase::isTableExist( @@ -138,7 +139,7 @@ void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name, { std::lock_guard lock(mutex); if (!dictionaries.emplace(dictionary_name, dictionary).second) - throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); + throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index d22ea41fd84..a9d0a55ea22 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -52,6 +52,7 @@ DictionaryPtr DictionaryFactory::create( DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const { + /// Temporary code for testing TODO(alesapin) (void)(name); (void)(ast); (void)(context); diff --git a/dbms/src/Dictionaries/DictionaryFactory.h b/dbms/src/Dictionaries/DictionaryFactory.h index fd7978f590f..df769c57645 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.h +++ b/dbms/src/Dictionaries/DictionaryFactory.h @@ -34,6 +34,7 @@ public: const std::string & config_prefix, const Context & context) const; + /// Temporary method for testing TODO(alesapin) DictionaryPtr create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const; diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 0443931d46d..84f5f08a418 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -74,7 +74,7 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dic LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); -SHOW CREATE DICTIONARY memory_db.dict2; -- {serverError 500} +SHOW CREATE DICTIONARY memory_db.dict2; -- {serverError 485} SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; From f850408ae48fb80c300d6883cb6e5fc014f879d9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 15 Oct 2019 15:16:19 +0300 Subject: [PATCH 016/122] Convert AST to Abstract configuration --- dbms/src/Common/ErrorCodes.cpp | 1 + .../getDictionaryConfigurationFromAST.cpp | 423 ++++++++++++++++++ .../tests/gtest_dictionary_configuration.cpp | 198 ++++++++ 3 files changed, 622 insertions(+) create mode 100644 dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp create mode 100644 dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 1dbca428ab2..b8a0737c4b5 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -463,6 +463,7 @@ namespace ErrorCodes extern const int DICTIONARY_ALREADY_EXISTS = 486; extern const int UNKNOWN_DICTIONARY = 487; extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED = 488; + extern const int INCORRECT_DICTIONARY_DEFINITION = 489; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp new file mode 100644 index 00000000000..a0afd91d2fa --- /dev/null +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -0,0 +1,423 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int INCORRECT_DICTIONARY_DEFINITION; +} + +/// There are a lot of code, but it's very simple and straightforward +/// We just convert +namespace +{ + +String unescapeString(const String & string) +{ + if (!string.empty() && string.front() == '\'' && string.back() == '\'') + return string.substr(1, string.size() - 2); + return string; +} + + +using namespace Poco; +using namespace Poco::XML; +/* + * Transforms next definition + * LIFETIME(MIN 10, MAX 100) + * to the next configuration + * + * 10 + * 100 + * + */ +void buildLifetimeConfiguration( + AutoPtr doc, + AutoPtr root, + const ASTDictionaryLifetime * lifetime) +{ + + AutoPtr lifetime_element(doc->createElement("lifetime")); + AutoPtr min_element(doc->createElement("min")); + AutoPtr max_element(doc->createElement("max")); + AutoPtr min_sec(doc->createTextNode(toString(lifetime->min_sec))); + min_element->appendChild(min_sec); + AutoPtr max_sec(doc->createTextNode(toString(lifetime->max_sec))); + max_element->appendChild(max_sec); + lifetime_element->appendChild(min_element); + lifetime_element->appendChild(max_element); + root->appendChild(lifetime_element); +} + +/* + * Transforms next definition + * LAYOUT(FLAT()) + * to the next configuration + * + * + * + * + * And next definition + * LAYOUT(CACHE(SIZE_IN_CELLS 1000)) + * to the next one + * + * + * 1000 + * + * + */ +void buildLayoutConfiguration( + AutoPtr doc, + AutoPtr root, + const ASTDictionaryLayout * layout) +{ + AutoPtr layout_element(doc->createElement("layout")); + root->appendChild(layout_element); + AutoPtr layout_type_element(doc->createElement(layout->layout_type)); + layout_element->appendChild(layout_type_element); + if (layout->parameter.has_value()) + { + const auto & param = layout->parameter; + AutoPtr layout_type_parameter_element(doc->createElement(param->first)); + const ASTLiteral & literal = param->second->as(); + AutoPtr value(doc->createTextNode(toString(literal.value.get()))); + layout_type_parameter_element->appendChild(value); + layout_type_element->appendChild(layout_type_parameter_element); + } +} + +/* + * Transforms next definition + * RANGE(MIN StartDate, MAX EndDate) + * to the next configuration + * StartDate + * EndDate + */ +void buildRangeConfiguration(AutoPtr doc, AutoPtr root, const ASTDictionaryRange * range) +{ + // appends value to root + auto appendElem = [&doc, &root](const std::string & key, const std::string & value) { + AutoPtr element(doc->createElement(key)); + AutoPtr name(doc->createElement("name")); + AutoPtr text(doc->createTextNode(value)); + name->appendChild(text); + element->appendChild(name); + root->appendChild(element); + }; + + appendElem("range_min", range->min_attr_name); + appendElem("range_max", range->max_attr_name); +} + + +/// Get primary key columns names from AST +Names getPrimaryKeyColumns(const ASTExpressionList * primary_key) +{ + Names result; + const auto & children = primary_key->children; + + for (size_t index = 0; index != children.size(); ++index) + { + const ASTIdentifier * key_part = children[index]->as(); + result.push_back(key_part->name); + } + return result; +} + +/** + * Transofrms single dictionary attribute to configuration + * third_column UInt8 DEFAULT 2 EXPRESSION rand() % 100 * 77 + * to + * + * third_column + * UInt8 + * 2 + * (rand() % 100) * 77 + * + */ +void buildSingleAttribute( + AutoPtr doc, + AutoPtr root, + const ASTDictionaryAttributeDeclaration * dict_attr) +{ + AutoPtr attribute_element(doc->createElement("attribute")); + root->appendChild(attribute_element); + + AutoPtr name_element(doc->createElement("name")); + AutoPtr name(doc->createTextNode(dict_attr->name)); + name_element->appendChild(name); + attribute_element->appendChild(name_element); + + AutoPtr type_element(doc->createElement("type")); + AutoPtr type(doc->createTextNode(queryToString(dict_attr->type))); + type_element->appendChild(type); + attribute_element->appendChild(type_element); + + AutoPtr null_value_element(doc->createElement("null_value")); + String null_value_str; + if (dict_attr->default_value) + null_value_str = queryToString(dict_attr->default_value); + AutoPtr null_value(doc->createTextNode(null_value_str)); + null_value_element->appendChild(null_value); + attribute_element->appendChild(null_value_element); + + if (dict_attr->expression != nullptr) + { + AutoPtr expression_element(doc->createElement("expression")); + AutoPtr expression(doc->createTextNode(queryToString(dict_attr->expression))); + expression_element->appendChild(expression); + attribute_element->appendChild(expression_element); + } + + if (dict_attr->hierarchical) + { + AutoPtr hierarchical_element(doc->createElement("hierarchical")); + AutoPtr hierarchical(doc->createTextNode("true")); + hierarchical_element->appendChild(hierarchical); + attribute_element->appendChild(hierarchical_element); + } + + if (dict_attr->injective) + { + AutoPtr injective_element(doc->createElement("injective")); + AutoPtr injective(doc->createTextNode("true")); + injective_element->appendChild(injective); + attribute_element->appendChild(injective_element); + } + + if (dict_attr->is_object_id) + { + AutoPtr is_object_id_element(doc->createElement("is_object_id")); + AutoPtr is_object_id(doc->createTextNode("true")); + is_object_id_element->appendChild(is_object_id); + attribute_element->appendChild(is_object_id_element); + } +} + + +/** + * Transforms + * PRIMARY KEY Attr1 ,..., AttrN + * to the next configuration + * Attr1 + * or + * + * + * Attr1 + * UInt8 + * + * ... + * fe + * + * + */ +void buildPrimaryKeyConfiguration( + AutoPtr doc, + AutoPtr root, + bool complex, + const Names & key_names, + const ASTExpressionList * dictionary_attributes) +{ + if (!complex) + { + if (key_names.size() != 1) + throw Exception("Primary key for simple dictionary must contain exactly one element", + ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + AutoPtr id_element(doc->createElement("id")); + root->appendChild(id_element); + AutoPtr name_element(doc->createElement("name")); + id_element->appendChild(name_element); + AutoPtr name(doc->createTextNode(*key_names.begin())); + name_element->appendChild(name); + } + else + { + const auto & children = dictionary_attributes->children; + if (children.size() < key_names.size()) + throw Exception( + "Primary key fields count is more, than dictionary attributes count.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + AutoPtr key_element(doc->createElement("key")); + root->appendChild(key_element); + for (const auto & key_name : key_names) + { + bool found = false; + for (const auto & attr : children) + { + const ASTDictionaryAttributeDeclaration * dict_attr = attr->as(); + if (dict_attr->name == key_name) + { + found = true; + buildSingleAttribute(doc, key_element, dict_attr); + break; + } + } + } + } +} + + +/** + * Transforms list of ASTDictionaryAttributeDeclarations to list of dictionary attributes + */ +void buildDictionaryAttributesConfiguration( + AutoPtr doc, + AutoPtr root, + const ASTExpressionList * dictionary_attributes, + const Names & key_columns) +{ + const auto & children = dictionary_attributes->children; + for (size_t i = 0; i < children.size(); ++i) + { + const ASTDictionaryAttributeDeclaration * dict_attr = children[i]->as(); + if (!dict_attr->type) + throw Exception("Dictionary attribute must has type", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (std::find(key_columns.begin(), key_columns.end(), dict_attr->name) == key_columns.end()) + buildSingleAttribute(doc, root, dict_attr); + + } +} + +/** Transform function with key-value arguments to configuration + * (used for source transformation) + */ +void buildConfigurationFromFunctionWithKeyValueArguments( + AutoPtr doc, + AutoPtr root, + const ASTExpressionList * ast_expr_list) +{ + const auto & children = ast_expr_list->children; + for (size_t i = 0; i != children.size(); ++i) + { + const ASTPair * pair = children[i]->as(); + AutoPtr current_xml_element(doc->createElement(pair->first)); + root->appendChild(current_xml_element); + + if (auto identifier = pair->second->as(); identifier) + { + AutoPtr value(doc->createTextNode(identifier->name)); + current_xml_element->appendChild(value); + } + else if (auto literal = pair->second->as(); literal) + { + String str_literal = applyVisitor(FieldVisitorToString(), literal->value); + AutoPtr value(doc->createTextNode(unescapeString(str_literal))); + current_xml_element->appendChild(value); + } + else if (auto list = pair->second->as(); list) + { + buildConfigurationFromFunctionWithKeyValueArguments(doc, current_xml_element, list); + } + else + { + throw Exception( + "Incorrect ASTPair contains wrong value, should be literal, identifier or list", + ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + } + } +} + +/** Build source definition from ast. + * SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA(HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) + * to + * + * + * localhost + * ... + * + * 127.0.0.1 + * ... + * + * + * + */ +void buildSourceConfiguration(AutoPtr doc, AutoPtr root, const ASTFunctionWithKeyValueArguments * source) +{ + AutoPtr outer_element(doc->createElement("source")); + root->appendChild(outer_element); + AutoPtr source_element(doc->createElement(source->name)); + outer_element->appendChild(source_element); + buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as()); +} + +void checkAST(const ASTCreateQuery & query) +{ + if (!query.is_dictionary || query.dictionary == nullptr) + throw Exception("Cannot convert dictionary to configuration from non-dictionary AST.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary_attributes_list == nullptr || query.dictionary_attributes_list->children.empty()) + throw Exception("Dictionary AST missing attributes list.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->layout == nullptr) + throw Exception("Cannot create dictionary with empty layout.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->lifetime == nullptr) + throw Exception("Dictionary AST missing lifetime section", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->primary_key == nullptr) + throw Exception("Dictionary AST missing primary key", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + if (query.dictionary->source == nullptr) + throw Exception("Dictionary AST missing source", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + + /// Range can be empty +} + +} + + +DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query) +{ + + checkAST(query); + + AutoPtr xml_document(new Poco::XML::Document()); + AutoPtr document_root(xml_document->createElement("dictionaries")); + xml_document->appendChild(document_root); + AutoPtr current_dictionary(xml_document->createElement("dictionary")); + document_root->appendChild(current_dictionary); + AutoPtr conf(new Poco::Util::XMLConfiguration()); + + AutoPtr name_element(xml_document->createElement("name")); + current_dictionary->appendChild(name_element); + AutoPtr name(xml_document->createTextNode(query.database + "." + query.table)); + name_element->appendChild(name); + + AutoPtr structure_element(xml_document->createElement("structure")); + current_dictionary->appendChild(structure_element); + Names pk_columns = getPrimaryKeyColumns(query.dictionary->primary_key); + auto dictionary_layout = query.dictionary->layout; + + bool complex = startsWith(dictionary_layout->layout_type, "complex"); + + buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_columns); + + buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_columns, query.dictionary_attributes_list); + + buildLayoutConfiguration(xml_document, current_dictionary, dictionary_layout); + buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source); + buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime); + + if (query.dictionary->range) + buildRangeConfiguration(xml_document, current_dictionary, query.dictionary->range); + + conf->load(xml_document); + return conf; +} + +} diff --git a/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp new file mode 100644 index 00000000000..b2fbdf70479 --- /dev/null +++ b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -0,0 +1,198 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace DB; + +/// For debug +std::string configurationToString(const DictionaryConfigurationPtr & config) +{ + const Poco::Util::XMLConfiguration * xml_config = dynamic_cast(config.get()); + std::ostringstream oss; + xml_config->save(oss); + return oss.str(); +} + +TEST(ConvertDictionaryAST, SimpleDictConfiguration) +{ + String input = " CREATE DICTIONARY test.dict1" + " (" + " key_column UInt64 DEFAULT 0," + " second_column UInt8 DEFAULT 1," + " third_column UInt8 DEFAULT 2" + " )" + " PRIMARY KEY key_column" + " SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' DB 'test' TABLE 'table_for_dict'))" + " LAYOUT(FLAT())" + " LIFETIME(MIN 1 MAX 10)" + " RANGE(MIN second_column MAX third_column)"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + + /// name + EXPECT_EQ(config->getString("dictionary.name"), "test.dict1"); + + /// lifetime + EXPECT_EQ(config->getInt("dictionary.lifetime.min"), 1); + EXPECT_EQ(config->getInt("dictionary.lifetime.max"), 10); + + /// range + EXPECT_EQ(config->getString("dictionary.range_min"), "second_column"); + EXPECT_EQ(config->getString("dictionary.range_max"), "third_column"); + + /// source + EXPECT_EQ(config->getString("dictionary.source.clickhouse.host"), "localhost"); + EXPECT_EQ(config->getInt("dictionary.source.clickhouse.port"), 9000); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.user"), "default"); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.password"), ""); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.db"), "test"); + EXPECT_EQ(config->getString("dictionary.source.clickhouse.table"), "table_for_dict"); + + /// attributes and key + Poco::Util::AbstractConfiguration::Keys keys; + config->keys("dictionary.structure", keys); + + EXPECT_EQ(keys.size(), 3); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".name"), "second_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".type"), "UInt8"); + EXPECT_EQ(config->getInt("dictionary.structure." + keys[0] + ".null_value"), 1); + + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".name"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".type"), "UInt8"); + EXPECT_EQ(config->getInt("dictionary.structure." + keys[1] + ".null_value"), 2); + + EXPECT_EQ(keys[2], "id"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[2] + ".name"), "key_column"); + + /// layout + EXPECT_TRUE(config->has("dictionary.layout.flat")); +} + + +TEST(ConvertDictionaryAST, TrickyAttributes) +{ + String input = " CREATE DICTIONARY dict2" + " (" + " key_column UInt64 IS_OBJECT_ID," + " second_column UInt8 HIERARCHICAL INJECTIVE," + " third_column UInt8 DEFAULT 2 EXPRESSION rand() % 100 * 77" + " )" + " PRIMARY KEY key_column" + " LAYOUT(hashed())" + " LIFETIME(MIN 1 MAX 10)" + " SOURCE(CLICKHOUSE(HOST 'localhost'))"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + + Poco::Util::AbstractConfiguration::Keys keys; + config->keys("dictionary.structure", keys); + + EXPECT_EQ(keys.size(), 3); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".name"), "second_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".type"), "UInt8"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".null_value"), ""); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".hierarchical"), "true"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".injective"), "true"); + + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".name"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".type"), "UInt8"); + EXPECT_EQ(config->getInt("dictionary.structure." + keys[1] + ".null_value"), 2); + EXPECT_EQ(config->getString("dictionary.structure." + keys[1] + ".expression"), "(rand() % 100) * 77"); + + EXPECT_EQ(keys[2], "id"); + EXPECT_EQ(config->getString("dictionary.structure." + keys[2] + ".name"), "key_column"); +} + + +TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) +{ + String input = " CREATE DICTIONARY dict4" + " (" + " key_column1 String," + " key_column2 UInt64," + " third_column UInt8," + " fourth_column UInt8" + " )" + " PRIMARY KEY key_column1, key_column2" + " SOURCE(MYSQL())" + " LAYOUT(COMPLEX_KEY_CACHE(size_in_cells 50))" + " LIFETIME(MIN 1 MAX 10)"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + + Poco::Util::AbstractConfiguration::Keys keys; + config->keys("dictionary.structure.key", keys); + + EXPECT_EQ(keys.size(), 2); + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[0] + ".name"), "key_column1"); + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[0] + ".type"), "String"); + + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[1] + ".name"), "key_column2"); + EXPECT_EQ(config->getString("dictionary.structure.key." + keys[1] + ".type"), "UInt64"); + + Poco::Util::AbstractConfiguration::Keys attrs; + config->keys("dictionary.structure", attrs); + + EXPECT_EQ(attrs.size(), 3); + EXPECT_EQ(config->getString("dictionary.structure." + attrs[0] + ".name"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure." + attrs[0] + ".type"), "UInt8"); + + EXPECT_EQ(config->getString("dictionary.structure." + attrs[1] + ".name"), "fourth_column"); + EXPECT_EQ(config->getString("dictionary.structure." + attrs[1] + ".type"), "UInt8"); + + EXPECT_EQ(attrs[2], "key"); + + EXPECT_EQ(config->getInt("dictionary.layout.complex_key_cache.size_in_cells"), 50); +} + + +TEST(ConvertDictionaryAST, ComplexSource) +{ + String input = " CREATE DICTIONARY dict4" + " (" + " key_column UInt64," + " second_column UInt8," + " third_column UInt8" + " )" + " PRIMARY KEY key_column" + " SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA(HOST '127.0.0.1' PRIORITY 1) PASSWORD ''))" + " LAYOUT(CACHE(size_in_cells 50))" + " LIFETIME(MIN 1 MAX 10)" + " RANGE(MIN second_column MAX third_column)"; + + ParserCreateDictionaryQuery parser; + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); + ASTCreateQuery * create = ast->as(); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + /// source + EXPECT_EQ(config->getString("dictionary.source.mysql.host"), "localhost"); + EXPECT_EQ(config->getInt("dictionary.source.mysql.port"), 9000); + EXPECT_EQ(config->getString("dictionary.source.mysql.user"), "default"); + EXPECT_EQ(config->getString("dictionary.source.mysql.password"), ""); + EXPECT_EQ(config->getString("dictionary.source.mysql.replica.host"), "127.0.0.1"); + EXPECT_EQ(config->getInt("dictionary.source.mysql.replica.priority"), 1); +} From 3e068b81de943af40a4b581d244686b64914274f Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 15 Oct 2019 17:09:57 +0300 Subject: [PATCH 017/122] Add loading of external dictionaries --- dbms/CMakeLists.txt | 4 +- dbms/src/Dictionaries/DictionaryFactory.cpp | 8 +-- dbms/src/Dictionaries/DictionaryFactory.h | 1 - .../getDictionaryConfigurationFromAST.cpp | 3 +- .../getDictionaryConfigurationFromAST.h | 14 +++++ dbms/src/Interpreters/Context.cpp | 3 + ...ExternalLoaderDatabaseConfigRepository.cpp | 55 +++++++++++++++++++ .../ExternalLoaderDatabaseConfigRepository.h | 30 ++++++++++ 8 files changed, 109 insertions(+), 9 deletions(-) create mode 100644 dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h create mode 100644 dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp create mode 100644 dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index ec9ffc6e3dd..289fc939ce5 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -130,8 +130,8 @@ list (APPEND dbms_headers list (APPEND dbms_sources src/TableFunctions/ITableFunction.cpp src/TableFunctions/TableFunctionFactory.cpp) list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h) -list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp) -list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h) +list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp src/Dictionaries/getDictionaryConfigurationFromAST.cpp) +list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h src/Dictionaries/getDictionaryConfigurationFromAST.h) if (NOT ENABLE_SSL) list (REMOVE_ITEM clickhouse_common_io_sources src/Common/OpenSSLHelpers.cpp) diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index a9d0a55ea22..149eb1ddd15 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -3,6 +3,7 @@ #include #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" +#include "getDictionaryConfigurationFromAST.h" namespace DB { @@ -52,11 +53,8 @@ DictionaryPtr DictionaryFactory::create( DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const { - /// Temporary code for testing TODO(alesapin) - (void)(name); - (void)(ast); - (void)(context); - return nullptr; + auto configurationFromAST = getDictionaryConfigurationFromAST(ast); + return DictionaryFactory::create(name, *configurationFromAST, "dictionary", context); } diff --git a/dbms/src/Dictionaries/DictionaryFactory.h b/dbms/src/Dictionaries/DictionaryFactory.h index df769c57645..fd7978f590f 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.h +++ b/dbms/src/Dictionaries/DictionaryFactory.h @@ -34,7 +34,6 @@ public: const std::string & config_prefix, const Context & context) const; - /// Temporary method for testing TODO(alesapin) DictionaryPtr create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const; diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index a0afd91d2fa..fe6240ea2a0 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -1,4 +1,5 @@ #include + #include #include #include @@ -15,6 +16,7 @@ namespace DB { + namespace ErrorCodes { extern const int INCORRECT_DICTIONARY_DEFINITION; @@ -383,7 +385,6 @@ void checkAST(const ASTCreateQuery & query) DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query) { - checkAST(query); AutoPtr xml_document(new Poco::XML::Document()); diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h new file mode 100644 index 00000000000..fdc17d42678 --- /dev/null +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +namespace DB +{ +using DictionaryConfigurationPtr = Poco::AutoPtr; + +/// Convert dictionary AST to Poco::AbstractConfiguration +/// This function is necessary because all loadable objects configuration are Poco::AbstractConfiguration +DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query); + +} diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 6636766259f..2ef47f5e0d6 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1034,6 +1035,8 @@ void Context::addDatabase(const String & database_name, const DatabasePtr & data assertDatabaseDoesntExist(database_name); shared->databases[database_name] = database; + auto dictionaries_repository = std::make_unique(database, *this); + getExternalDictionariesLoader().addConfigRepository(std::move(dictionaries_repository), {}); } diff --git a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp new file mode 100644 index 00000000000..43d9d8ca103 --- /dev/null +++ b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp @@ -0,0 +1,55 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_DICTIONARY; +} + +namespace +{ +String trimDatabaseName(const std::string & loadable_definition_name, const DatabasePtr database) +{ + const auto & dbname = database->getDatabaseName(); + if (!startsWith(loadable_definition_name, dbname)) + throw Exception( + "Loadable '" + loadable_definition_name + "' is not from database '" + database->getDatabaseName(), ErrorCodes::UNKNOWN_DICTIONARY); + /// dbname.loadable_name + ///--> remove <--- + return loadable_definition_name.substr(dbname.length()); +} +} + +LoadablesConfigurationPtr ExternalLoaderDatabaseConfigRepository::load(const std::string & loadable_definition_name) const +{ + String dictname = trimDatabaseName(loadable_definition_name, database); + return getDictionaryConfigurationFromAST(database->getCreateDictionaryQuery(context, dictname)->as()); +} + +bool ExternalLoaderDatabaseConfigRepository::exists(const std::string & loadable_definition_name) const +{ + return database->isDictionaryExist(context, trimDatabaseName(loadable_definition_name, database)); +} + +Poco::Timestamp ExternalLoaderDatabaseConfigRepository::getUpdateTime(const std::string & loadable_definition_name) +{ + return database->getObjectMetadataModificationTime(context, trimDatabaseName(loadable_definition_name, database)); +} + +std::set ExternalLoaderDatabaseConfigRepository::getAllLoadablesDefinitionNames() const +{ + std::set result; + const auto & dbname = database->getDatabaseName(); + auto itr = database->getDictionariesIterator(context); + while (itr && itr->isValid()) + { + result.insert(dbname + "." + itr->name()); + itr->next(); + } + return result; +} + +} diff --git a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h new file mode 100644 index 00000000000..4f48f882f82 --- /dev/null +++ b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +class ExternalLoaderDatabaseConfigRepository : public IExternalLoaderConfigRepository +{ +public: + ExternalLoaderDatabaseConfigRepository(const DatabasePtr & database_, const Context & context_) + : database(database_) + , context(context_) + { + } + + std::set getAllLoadablesDefinitionNames() const override; + + bool exists(const std::string & loadable_definition_name) const override; + + Poco::Timestamp getUpdateTime(const std::string & loadable_definition_name) override; + + LoadablesConfigurationPtr load(const std::string & loadable_definition_name) const override; + +private: + DatabasePtr database; + Context context; +}; +} From e690a3ca329944ec9795ab87b61409cd75d035d4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 15 Oct 2019 21:04:17 +0300 Subject: [PATCH 018/122] Half working code --- dbms/src/Databases/DatabaseOnDisk.cpp | 5 ++ dbms/src/Databases/DatabaseOrdinary.cpp | 14 ++++-- dbms/src/Databases/DatabaseOrdinary.h | 1 + .../getDictionaryConfigurationFromAST.cpp | 1 + dbms/src/Interpreters/Context.cpp | 3 +- .../ExternalDictionariesLoader.cpp | 2 +- dbms/src/Interpreters/ExternalLoader.cpp | 41 ++++++++++++----- dbms/src/Interpreters/ExternalLoader.h | 11 +++-- ...ExternalLoaderDatabaseConfigRepository.cpp | 10 +++- .../src/Interpreters/ExternalModelsLoader.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 8 ++++ .../src/Interpreters/InterpreterDropQuery.cpp | 1 + .../01018_ddl_dictionaries_create.reference | 4 +- .../01018_ddl_dictionaries_create.sql | 46 +++++++++++++------ 14 files changed, 109 insertions(+), 40 deletions(-) diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index f77d0476e88..4e0b93fbad7 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -308,6 +309,10 @@ void DatabaseOnDisk::createDictionary( Poco::File(dictionary_metadata_tmp_path).remove(); throw; } + + const auto & config = context.getConfigRef(); + context.getExternalDictionariesLoader().reload( + database.getDatabaseName() + "." + dictionary_name, config.getBool("dictionaries_lazy_load", true)); } diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 98243f4964e..06c575473ab 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -160,9 +162,6 @@ void DatabaseOrdinary::loadStoredObjects( }); - if (file_names.empty()) - return; - size_t total_tables = file_names.size() - total_dictionaries; LOG_INFO(log, "Total " << total_tables << " tables and " << total_dictionaries << " dictionaries."); @@ -191,6 +190,7 @@ void DatabaseOrdinary::loadStoredObjects( /// After all tables was basically initialized, startup them. startupTables(pool); + loadDictionaries(context); } @@ -217,6 +217,14 @@ void DatabaseOrdinary::startupTables(ThreadPool & thread_pool) thread_pool.wait(); } +void DatabaseOrdinary::loadDictionaries(Context & context) +{ + LOG_INFO(log, "Loading dictionaries."); + + auto dictionaries_repository = std::make_unique(shared_from_this(), context); + context.getExternalDictionariesLoader().addConfigRepository(getDatabaseName(), std::move(dictionaries_repository), {}); +} + void DatabaseOrdinary::createTable( const Context & context, diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index 7a381489a1f..26309ff4b0b 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -92,6 +92,7 @@ private: Poco::Logger * log; void startupTables(ThreadPool & thread_pool); + void loadDictionaries(Context & context); }; } diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index fe6240ea2a0..9772cc17c13 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -359,6 +359,7 @@ void buildSourceConfiguration(AutoPtr doc, AutoPtr root, cons void checkAST(const ASTCreateQuery & query) { + std::cerr << queryToString(query) << std::endl; if (!query.is_dictionary || query.dictionary == nullptr) throw Exception("Cannot convert dictionary to configuration from non-dictionary AST.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 2ef47f5e0d6..23588275c3e 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1035,8 +1035,6 @@ void Context::addDatabase(const String & database_name, const DatabasePtr & data assertDatabaseDoesntExist(database_name); shared->databases[database_name] = database; - auto dictionaries_repository = std::make_unique(database, *this); - getExternalDictionariesLoader().addConfigRepository(std::move(dictionaries_repository), {}); } @@ -1045,6 +1043,7 @@ DatabasePtr Context::detachDatabase(const String & database_name) auto lock = getLock(); auto res = getDatabase(database_name); + getExternalDictionariesLoader().removeConfigRepository(database_name); shared->databases.erase(database_name); return res; } diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 5e84da0a0cd..596db6b8960 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -11,7 +11,7 @@ ExternalDictionariesLoader::ExternalDictionariesLoader( : ExternalLoader("external dictionary", &Logger::get("ExternalDictionariesLoader")) , context(context_) { - addConfigRepository(std::move(config_repository), {"dictionary", "name"}); + addConfigRepository("_XMLConfigRepository", std::move(config_repository), {"dictionary", "name"}); enableAsyncLoading(true); enablePeriodicUpdates(true); } diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index b4c61f5a5da..4cb8959aa60 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -40,12 +40,19 @@ public: } ~LoadablesConfigReader() = default; - void addConfigRepository(std::unique_ptr repository, const ExternalLoaderConfigSettings & settings) + void addConfigRepository(const String & name, std::unique_ptr repository, const ExternalLoaderConfigSettings & settings) { std::lock_guard lock{mutex}; - repositories.emplace_back(std::move(repository), std::move(settings)); + repositories.emplace(name, std::make_pair(std::move(repository), settings)); } + void removeConfigRepository(const String & name) + { + std::lock_guard lock{mutex}; + repositories.erase(name); + } + + using ObjectConfigsPtr = std::shared_ptr>; /// Reads configuration files. @@ -100,22 +107,22 @@ private: loadable_info.in_use = false; } - for (const auto & [repository, settings] : repositories) + for (const auto & [name, repo_with_settings] : repositories) { - const auto names = repository->getAllLoadablesDefinitionNames(); + const auto names = repo_with_settings.first->getAllLoadablesDefinitionNames(); for (const auto & name : names) { auto it = loadables_infos.find(name); if (it != loadables_infos.end()) { LoadablesInfos & loadable_info = it->second; - if (readLoadablesInfo(*repository, name, settings, loadable_info)) + if (readLoadablesInfo(*repo_with_settings.first, name, repo_with_settings.second, loadable_info)) changed = true; } else { LoadablesInfos loadable_info; - if (readLoadablesInfo(*repository, name, settings, loadable_info)) + if (readLoadablesInfo(*repo_with_settings.first, name, repo_with_settings.second, loadable_info)) { loadables_infos.emplace(name, std::move(loadable_info)); changed = true; @@ -147,7 +154,7 @@ private: { if (path.empty() || !repository.exists(path)) { - LOG_WARNING(log, "config file '" + path + "' does not exist"); + LOG_WARNING(log, "Config file '" + path + "' does not exist"); return false; } @@ -203,7 +210,9 @@ private: Logger * log; std::mutex mutex; - std::vector, ExternalLoaderConfigSettings>> repositories; + using RepositoryPtr = std::unique_ptr; + using RepositoryWithSettings = std::pair; + std::unordered_map repositories; ObjectConfigsPtr configs; std::unordered_map loadables_infos; }; @@ -956,9 +965,17 @@ ExternalLoader::ExternalLoader(const String & type_name_, Logger * log) ExternalLoader::~ExternalLoader() = default; void ExternalLoader::addConfigRepository( - std::unique_ptr config_repository, const ExternalLoaderConfigSettings & config_settings) + const std::string & repository_name, + std::unique_ptr config_repository, + const ExternalLoaderConfigSettings & config_settings) { - config_files_reader->addConfigRepository(std::move(config_repository), config_settings); + config_files_reader->addConfigRepository(repository_name, std::move(config_repository), config_settings); + loading_dispatcher->setConfiguration(config_files_reader->read()); +} + +void ExternalLoader::removeConfigRepository(const std::string & repository_name) +{ + config_files_reader->removeConfigRepository(repository_name); loading_dispatcher->setConfiguration(config_files_reader->read()); } @@ -1040,13 +1057,13 @@ void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const return loading_dispatcher->load(loaded_objects, timeout); } -void ExternalLoader::reload(const String & name, bool load_never_loading) +void ExternalLoader::reload(const String & name, bool load_never_loading) const { loading_dispatcher->setConfiguration(config_files_reader->read()); loading_dispatcher->reload(name, load_never_loading); } -void ExternalLoader::reload(bool load_never_loading) +void ExternalLoader::reload(bool load_never_loading) const { loading_dispatcher->setConfiguration(config_files_reader->read()); loading_dispatcher->reload(load_never_loading); diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 5c1fd1c0416..cb86a614958 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -81,7 +81,12 @@ public: /// Adds a repository which will be used to read configurations from. void addConfigRepository( - std::unique_ptr config_repository, const ExternalLoaderConfigSettings & config_settings); + const std::string & repository_name, + std::unique_ptr config_repository, + const ExternalLoaderConfigSettings & config_settings); + + /// Removes a repository which were used to read configurations. + void removeConfigRepository(const std::string & repository_name); /// Sets whether all the objects from the configuration should be always loaded (even those which are never used). void enableAlwaysLoadEverything(bool enable); @@ -140,12 +145,12 @@ public: /// Starts reloading of a specified object. /// `load_never_loading` specifies what to do if the object has never been loading before. /// The function can either skip it (false) or load for the first time (true). - void reload(const String & name, bool load_never_loading = false); + void reload(const String & name, bool load_never_loading = false) const; /// Starts reloading of all the objects. /// `load_never_loading` specifies what to do with the objects which have never been loading before. /// The function can either skip them (false) or load for the first time (true). - void reload(bool load_never_loading = false); + void reload(bool load_never_loading = false) const; protected: virtual LoadablePtr create(const String & name, const Poco::Util::AbstractConfiguration & config, const String & key_in_config) const = 0; diff --git a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp index 43d9d8ca103..daed7f456a2 100644 --- a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp +++ b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp @@ -19,7 +19,7 @@ String trimDatabaseName(const std::string & loadable_definition_name, const Data "Loadable '" + loadable_definition_name + "' is not from database '" + database->getDatabaseName(), ErrorCodes::UNKNOWN_DICTIONARY); /// dbname.loadable_name ///--> remove <--- - return loadable_definition_name.substr(dbname.length()); + return loadable_definition_name.substr(dbname.length() + 1); } } @@ -31,7 +31,12 @@ LoadablesConfigurationPtr ExternalLoaderDatabaseConfigRepository::load(const std bool ExternalLoaderDatabaseConfigRepository::exists(const std::string & loadable_definition_name) const { - return database->isDictionaryExist(context, trimDatabaseName(loadable_definition_name, database)); + std::cerr << "IS EXISTS:" + << loadable_definition_name << std::endl; + std::cerr << "CUTTED:" + << trimDatabaseName(loadable_definition_name, database) << std::endl; + return database->isDictionaryExist( + context, trimDatabaseName(loadable_definition_name, database)); } Poco::Timestamp ExternalLoaderDatabaseConfigRepository::getUpdateTime(const std::string & loadable_definition_name) @@ -49,6 +54,7 @@ std::set ExternalLoaderDatabaseConfigRepository::getAllLoadablesDef result.insert(dbname + "." + itr->name()); itr->next(); } + std::cerr << "RESULTSIZE:" << result.size() << std::endl; return result; } diff --git a/dbms/src/Interpreters/ExternalModelsLoader.cpp b/dbms/src/Interpreters/ExternalModelsLoader.cpp index 462e8110249..690b7d920ca 100644 --- a/dbms/src/Interpreters/ExternalModelsLoader.cpp +++ b/dbms/src/Interpreters/ExternalModelsLoader.cpp @@ -15,7 +15,7 @@ ExternalModelsLoader::ExternalModelsLoader( : ExternalLoader("external model", &Logger::get("ExternalModelsLoader")) , context(context_) { - addConfigRepository(std::move(config_repository), {"model", "name"}); + addConfigRepository("_XMLConfigRepository", std::move(config_repository), {"model", "name"}); enablePeriodicUpdates(true); } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 55b407f6052..0d2576477dd 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -161,6 +161,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) try { + std::cerr << "ADDING DB NAME:" << database_name << std::endl; context.addDatabase(database_name, database); if (need_write_metadata) @@ -721,6 +722,13 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create) "Dictionary " + database_name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); } + if (create.attach) + { + auto query = context.getCreateDictionaryQuery(database_name, dictionary_name); + create = query->as(); + create.attach = true; + } + auto res = DictionaryFactory::instance().create(dictionary_name, create, context.getGlobalContext()); if (create.attach) database->attachDictionary(dictionary_name, res); diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 3b8dd382ac7..5e78f4cffcd 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index 53d2a29fce2..d78a3337ba3 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -18,9 +18,7 @@ memory_db dict2 ==DETACH DICTIONARY 0 ==ATTACH DICTIONARY -dict2 -1 -memory_db dict2 +0 ==DROP DICTIONARY 0 =DICTIONARY in Dictionary DB diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 84f5f08a418..6720cb576b0 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -1,5 +1,22 @@ SET send_logs_level = 'none'; +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +CREATE TABLE database_for_dict.table_for_dict +( + key_column UInt64, + second_column UInt8, + third_column String +) +ENGINE = MergeTree() +ORDER BY key_column; + +INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, 'Hello world'); + DROP DATABASE IF EXISTS ordinary_db; CREATE DATABASE ordinary_db ENGINE = Ordinary; @@ -15,7 +32,7 @@ CREATE DICTIONARY ordinary_db.dict1 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); @@ -65,12 +82,12 @@ SELECT '=DICTIONARY in Memory DB'; CREATE DICTIONARY memory_db.dict2 ( - key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, + key_column UInt64 DEFAULT 0 INJECTIVE, second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); @@ -93,7 +110,7 @@ SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; SELECT '==ATTACH DICTIONARY'; -ATTACH DICTIONARY memory_db.dict2; +ATTACH DICTIONARY memory_db.dict2; --{serverError 485} SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; @@ -121,14 +138,14 @@ SELECT '=DICTIONARY in Dictionary DB'; CREATE DICTIONARY dictionary_db.dict2 ( - key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, + key_column UInt64 DEFAULT 0 INJECTIVE, second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column, second_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) -LAYOUT(FLAT()); -- {serverError 1} +LAYOUT(COMPLEX_KEY_HASHED()); -- {serverError 1} DROP DATABASE IF EXISTS dictionary_db; @@ -140,18 +157,17 @@ CREATE DATABASE lazy_db ENGINE = Lazy(1); CREATE DICTIONARY lazy_db.dict3 ( - key_column UInt64 DEFAULT 0 INJECTIVE HIERARCHICAL, + key_column UInt64 DEFAULT 0 INJECTIVE, second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column, second_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) -LAYOUT(FLAT()); -- {serverError 1} +LAYOUT(COMPLEX_KEY_HASHED()); -- {serverError 1} DROP DATABASE IF EXISTS lazy_db; - SELECT '=DROP DATABASE WITH DICTIONARY'; DROP DATABASE IF EXISTS ordinary_db; @@ -165,7 +181,7 @@ CREATE DICTIONARY ordinary_db.dict4 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); @@ -184,10 +200,14 @@ CREATE DICTIONARY ordinary_db.dict4 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); SHOW DICTIONARIES FROM ordinary_db; DROP DATABASE IF EXISTS ordinary_db; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +DROP DATABASE IF EXISTS database_for_dict; From e6e88241fc08ce513ba0495e1c2ef72368f52b01 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 16 Oct 2019 17:47:58 +0300 Subject: [PATCH 019/122] SyntaxAnalyzer refactoring: better getTablesWithColumns() --- dbms/src/Interpreters/IdentifierSemantic.cpp | 22 ++++++ dbms/src/Interpreters/IdentifierSemantic.h | 1 + dbms/src/Interpreters/SyntaxAnalyzer.cpp | 69 ++++++++++++------- .../TranslateQualifiedNamesVisitor.cpp | 16 +---- .../TranslateQualifiedNamesVisitor.h | 6 +- 5 files changed, 73 insertions(+), 41 deletions(-) diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index 361462c0d1d..6b74dc2d8d1 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -57,6 +57,28 @@ size_t IdentifierSemantic::getMembership(const ASTIdentifier & identifier) return identifier.semantic->membership; } +bool IdentifierSemantic::trySetMembership(ASTIdentifier & identifier, const std::vector & tables, + size_t & best_table_pos) +{ + best_table_pos = 0; + size_t best_match = 0; + for (size_t i = 0; i < tables.size(); ++i) + if (size_t match = canReferColumnToTable(identifier, tables[i].first)) + if (match > best_match) + { + best_match = match; + best_table_pos = i; + } + + if (best_match) + { + setMembership(identifier, best_table_pos + 1); + return true; + } + + return false; +} + std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) { if (identifier.name_parts.size() > 2) diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index b4bf87e7fef..4fde404488c 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -35,6 +35,7 @@ struct IdentifierSemantic static bool canBeAlias(const ASTIdentifier & identifier); static void setMembership(ASTIdentifier & identifier, size_t table_no); static size_t getMembership(const ASTIdentifier & identifier); + static bool trySetMembership(ASTIdentifier & identifier, const std::vector & tables, size_t & best_table_pos); private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 228aea0b2f2..3d43b7f0b25 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -91,7 +91,10 @@ void collectSourceColumns(const ColumnsDescription & columns, NamesAndTypesList } } -std::vector getTablesWithColumns(const ASTSelectQuery & select_query, const Context & context) +std::vector getTablesWithColumns(const ASTSelectQuery & select_query, const Context & context, + const ASTTablesInSelectQueryElement * table_join_node, + NamesAndTypesList & columns_from_joined_table, + std::function get_column_names) { std::vector tables_with_columns = getDatabaseAndTablesWithColumnNames(select_query, context); @@ -104,6 +107,27 @@ std::vector getTablesWithColumns(const ASTSelectQuery & se ErrorCodes::ALIAS_REQUIRED); } + TableWithColumnNames joined_table; + + if (table_join_node) + { + const auto & joined_expression = table_join_node->table_expression->as(); + + columns_from_joined_table = getNamesAndTypeListFromTableExpression(joined_expression, context); + + joined_table.first = DatabaseAndTableWithAlias(joined_expression, context.getCurrentDatabase()); + for (const auto & column : columns_from_joined_table) + joined_table.second.push_back(column.name); + } + + /// If empty make table(s) with list of source and joined columns + if (tables_with_columns.empty()) + { + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, get_column_names()); + if (!joined_table.second.empty()) + tables_with_columns.emplace_back(std::move(joined_table)); + } + return tables_with_columns; } @@ -805,41 +829,34 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( if (remove_duplicates) renameDuplicatedColumns(select_query); - if (const ASTTablesInSelectQueryElement * node = select_query->join()) + const ASTTablesInSelectQueryElement * table_join_node = select_query->join(); + if (table_join_node) { if (!settings.any_join_distinct_right_table_keys) - checkJoin(node); + checkJoin(table_join_node); if (settings.enable_optimize_predicate_expression) - replaceJoinedTable(node); - - const auto & joined_expression = node->table_expression->as(); - DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase()); - - result.analyzed_join->columns_from_joined_table = getNamesAndTypeListFromTableExpression(joined_expression, context); - result.analyzed_join->deduplicateAndQualifyColumnNames(source_columns_set, table.getQualifiedNamePrefix()); + replaceJoinedTable(table_join_node); } - auto tables_with_columns = getTablesWithColumns(*select_query, context); - - /// If empty make fake table with list of source and joined columns - if (tables_with_columns.empty()) + auto get_column_names = [&]() -> Names { - Names columns_list; if (storage) - columns_list = storage->getColumns().getOrdinary().getNames(); - else - { - columns_list.reserve(result.source_columns.size()); - for (const auto & column : result.source_columns) - columns_list.emplace_back(column.name); - } + return storage->getColumns().getOrdinary().getNames(); - for (auto & column : result.analyzed_join->getQualifiedColumnsSet()) - columns_list.emplace_back(column); + Names columns; + columns.reserve(result.source_columns.size()); + for (const auto & column : result.source_columns) + columns.push_back(column.name); + return columns; + }; - tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(columns_list)); - } + auto tables_with_columns = getTablesWithColumns(*select_query, context, table_join_node, + result.analyzed_join->columns_from_joined_table, get_column_names); + + if (tables_with_columns.size() > 1) + result.analyzed_join->deduplicateAndQualifyColumnNames( + source_columns_set, tables_with_columns[1].first.getQualifiedNamePrefix()); translateQualifiedNames(query, *select_query, source_columns_set, std::move(tables_with_columns)); diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 7ae98d3e9c8..e05387b5aa0 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -62,22 +62,12 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (IdentifierSemantic::getColumnName(identifier)) { size_t best_table_pos = 0; - size_t best_match = 0; - for (size_t i = 0; i < data.tables.size(); ++i) - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i].first)) - if (match > best_match) - { - best_match = match; - best_table_pos = i; - } - - if (best_match) - IdentifierSemantic::setMembership(identifier, best_table_pos + 1); + IdentifierSemantic::trySetMembership(identifier, data.tables, best_table_pos); /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && data.source_columns.count(identifier.shortName())) + if (best_table_pos && data.hasColumn(identifier.shortName())) IdentifierSemantic::setNeedLongName(identifier, true); - if (!data.tables.empty()) + if (data.hasTable()) IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first); } } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 4bf18b59cb9..b3718170dda 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -35,6 +35,10 @@ public: , has_columns(has_columns_) {} + bool hasColumn(const String & name) const { return source_columns.count(name); } + bool hasTable() const { return !tables.empty(); } + bool processAsterisks() const { return hasTable() && has_columns; } + static std::vector tablesOnly(const std::vector & tables) { std::vector tables_with_columns; @@ -44,8 +48,6 @@ public: tables_with_columns.emplace_back(TableWithColumnNames{table, {}}); return tables_with_columns; } - - bool processAsterisks() const { return !tables.empty() && has_columns; } }; static void visit(ASTPtr & ast, Data & data); From b4e0ded04803a3fe6e7ecd75dbe2bcc367555165 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 16 Oct 2019 17:59:52 +0300 Subject: [PATCH 020/122] Almost working --- dbms/src/Databases/DatabaseDictionary.cpp | 13 ++----- dbms/src/Databases/DatabaseDictionary.h | 10 ++---- dbms/src/Databases/DatabaseLazy.cpp | 10 ++---- dbms/src/Databases/DatabaseLazy.h | 7 ++-- dbms/src/Databases/DatabaseMemory.cpp | 3 +- dbms/src/Databases/DatabaseMemory.h | 1 - dbms/src/Databases/DatabaseMySQL.h | 8 ++--- dbms/src/Databases/DatabaseOnDisk.cpp | 12 +++---- dbms/src/Databases/DatabaseOnDisk.h | 1 - dbms/src/Databases/DatabaseOrdinary.cpp | 23 +++++------- dbms/src/Databases/DatabaseOrdinary.h | 1 - dbms/src/Databases/DatabasesCommon.cpp | 35 ++++++------------- dbms/src/Databases/DatabasesCommon.h | 6 ++-- dbms/src/Databases/IDatabase.h | 15 +++----- dbms/src/Dictionaries/DictionaryFactory.cpp | 6 ++-- dbms/src/Dictionaries/IDictionary_fwd.h | 3 +- dbms/src/Interpreters/Context.cpp | 1 + dbms/src/Interpreters/ExternalLoader.cpp | 26 +++++++------- ...ExternalLoaderDatabaseConfigRepository.cpp | 5 --- .../Interpreters/InterpreterCreateQuery.cpp | 6 ++-- .../01018_ddl_dictionaries_create.reference | 2 +- 21 files changed, 63 insertions(+), 131 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index f76a3f0e67e..d498d8a3f86 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -73,13 +73,6 @@ bool DatabaseDictionary::isDictionaryExist( return false; } -DictionaryPtr DatabaseDictionary::tryGetDictionary( - const Context & /*context*/, - const String & /*dictionary_name*/) const -{ - return nullptr; -} - DatabaseDictionariesIteratorPtr DatabaseDictionary::getDictionariesIterator( const Context & /*context*/, @@ -92,7 +85,6 @@ DatabaseDictionariesIteratorPtr DatabaseDictionary::getDictionariesIterator( void DatabaseDictionary::createDictionary( const Context & /*context*/, const String & /*dictionary_name*/, - const DictionaryPtr & /*dict_ptr*/, const ASTPtr & /*query*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); @@ -106,13 +98,12 @@ void DatabaseDictionary::removeDictionary( } void DatabaseDictionary::attachDictionary( - const String & /*dictionary_name*/, - const DictionaryPtr & /*table*/) + const String & /*dictionary_name*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); } -DictionaryPtr DatabaseDictionary::detachDictionary( +void DatabaseDictionary::detachDictionary( const String & /*dictionary_name*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index ee458ea64c4..e4b02412a43 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -41,13 +41,10 @@ public: bool isDictionaryExist(const Context & context, const String & table_name) const override; - StoragePtr tryGetTable( const Context & context, const String & table_name) const override; - DictionaryPtr tryGetDictionary(const Context & context, const String & dictionary_name) const override; - DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; @@ -61,7 +58,7 @@ public: const ASTPtr & query) override; void createDictionary( - const Context & context, const String & dictionary_name, const DictionaryPtr & dict_ptr, const ASTPtr & query) override; + const Context & context, const String & dictionary_name, const ASTPtr & query) override; void removeTable( const Context & context, @@ -92,10 +89,9 @@ public: ASTPtr tryGetCreateDictionaryQuery(const Context & context, const String & table_name) const override; - void attachDictionary(const String & dictionary_name, const DictionaryPtr & table) override; - - DictionaryPtr detachDictionary(const String & dictionary_name) override; + void attachDictionary(const String & dictionary_name) override; + void detachDictionary(const String & dictionary_name) override; void shutdown() override; diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 75fd2ff2996..ed7c8ebcbf5 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -75,7 +75,6 @@ void DatabaseLazy::createTable( void DatabaseLazy::createDictionary( const Context & /*context*/, const String & /*dictionary_name*/, - const DictionaryPtr & /*dict_ptr*/, const ASTPtr & /*query*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); @@ -114,10 +113,6 @@ bool DatabaseLazy::isDictionaryExist(const Context & /*context*/, const String & return false; } -DictionaryPtr DatabaseLazy::tryGetDictionary(const Context & /*context*/, const String & /*dictionary_name*/) const -{ - return nullptr; -} DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( const Context & /*context*/, @@ -127,13 +122,12 @@ DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( } void DatabaseLazy::attachDictionary( - const String & /*dictionary_name*/, - const DictionaryPtr & /*table*/) + const String & /*dictionary_name*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } -DictionaryPtr DatabaseLazy::detachDictionary(const String & /*dictionary_name*/) +void DatabaseLazy::detachDictionary(const String & /*dictionary_name*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index a8ef26c8c02..fdd4f18083a 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -35,7 +35,6 @@ public: void createDictionary( const Context & context, const String & dictionary_name, - const DictionaryPtr & dict_ptr, const ASTPtr & query) override; void removeTable( @@ -102,8 +101,6 @@ public: const Context & context, const String & table_name) const override; - DictionaryPtr tryGetDictionary(const Context & context, const String & dictionary_name) const override; - bool empty(const Context & context) const override; DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; @@ -114,9 +111,9 @@ public: StoragePtr detachTable(const String & table_name) override; - void attachDictionary(const String & dictionary_name, const DictionaryPtr & table) override; + void attachDictionary(const String & dictionary_name) override; - DictionaryPtr detachDictionary(const String & dictionary_name) override; + void detachDictionary(const String & dictionary_name) override; void shutdown() override; diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 0fd4d04ec12..6d3379d9a0c 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -37,10 +37,9 @@ void DatabaseMemory::createTable( void DatabaseMemory::createDictionary( const Context & /*context*/, const String & dictionary_name, - const DictionaryPtr & dictionary, const ASTPtr & /*query*/) { - attachDictionary(dictionary_name, dictionary); + attachDictionary(dictionary_name); } diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index 6a52d02f949..d0ef4a47d25 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -36,7 +36,6 @@ public: void createDictionary( const Context & context, const String & dictionary_name, - const DictionaryPtr & dictionary, const ASTPtr & query) override; void removeTable( diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index 7419cad3306..cf06a347f9a 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -44,8 +44,6 @@ public: StoragePtr tryGetTable(const Context & context, const String & name) const override; - DictionaryPtr tryGetDictionary(const Context &, const String &) const override { return {}; } - ASTPtr tryGetCreateTableQuery(const Context & context, const String & name) const override; ASTPtr getCreateDictionaryQuery(const Context &, const String &) const override @@ -65,7 +63,7 @@ public: throw Exception("MySQL database engine does not support detach table.", ErrorCodes::NOT_IMPLEMENTED); } - DictionaryPtr detachDictionary(const String &) override + void detachDictionary(const String &) override { throw Exception("MySQL database engine does not support detach dictionary.", ErrorCodes::NOT_IMPLEMENTED); } @@ -91,7 +89,7 @@ public: throw Exception("MySQL database engine does not support attach table.", ErrorCodes::NOT_IMPLEMENTED); } - void attachDictionary(const String &, const DictionaryPtr &) override + void attachDictionary(const String &) override { throw Exception("MySQL database engine does not support attach dictionary.", ErrorCodes::NOT_IMPLEMENTED); } @@ -101,7 +99,7 @@ public: throw Exception("MySQL database engine does not support create table.", ErrorCodes::NOT_IMPLEMENTED); } - void createDictionary(const Context &, const String &, const DictionaryPtr &, const ASTPtr &) override + void createDictionary(const Context &, const String &, const ASTPtr &) override { throw Exception("MySQL database engine does not support create dictionary.", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index 4e0b93fbad7..a4b98e9eb47 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -255,7 +255,6 @@ void DatabaseOnDisk::createDictionary( IDatabase & database, const Context & context, const String & dictionary_name, - const DictionaryPtr & dictionary, const ASTPtr & query) { const auto & settings = context.getSettingsRef(); @@ -297,8 +296,7 @@ void DatabaseOnDisk::createDictionary( try { - /// Add a table to the map of known tables. - database.attachDictionary(dictionary_name, dictionary); + database.attachDictionary(dictionary_name); /// If it was ATTACH query and file with table metadata already exist /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one. @@ -353,7 +351,7 @@ void DatabaseOnDisk::removeDictionary( const String & dictionary_name, Poco::Logger * log) { - DictionaryPtr res = database.detachDictionary(dictionary_name); + database.detachDictionary(dictionary_name); String dictionary_metadata_path = database.getObjectMetadataPath(dictionary_name); @@ -372,7 +370,7 @@ void DatabaseOnDisk::removeDictionary( { LOG_WARNING(log, getCurrentExceptionMessage(__PRETTY_FUNCTION__)); } - database.attachDictionary(dictionary_name, res); + database.attachDictionary(dictionary_name); throw; } } @@ -417,9 +415,9 @@ ASTPtr DatabaseOnDisk::getCreateDictionaryQueryImpl( if (!ast && throw_on_error) { /// Handle system.* tables for which there are no table.sql files. - bool has_table = database.tryGetDictionary(context, dictionary_name) != nullptr; + bool has_dictionary = database.isDictionaryExist(context, dictionary_name); - auto msg = has_table ? "There is no CREATE DICTIONARY query for table " : "There is no metadata file for dictionary "; + auto msg = has_dictionary ? "There is no CREATE DICTIONARY query for table " : "There is no metadata file for dictionary "; throw Exception(msg + backQuote(dictionary_name), ErrorCodes::CANNOT_GET_CREATE_DICTIONARY_QUERY); } diff --git a/dbms/src/Databases/DatabaseOnDisk.h b/dbms/src/Databases/DatabaseOnDisk.h index 329771fa4e9..49910f67e11 100644 --- a/dbms/src/Databases/DatabaseOnDisk.h +++ b/dbms/src/Databases/DatabaseOnDisk.h @@ -51,7 +51,6 @@ public: IDatabase & database, const Context & context, const String & dictionary_name, - const DictionaryPtr & dictionary, const ASTPtr & query); static void removeTable( diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 06c575473ab..cabe0a943c2 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -55,21 +55,16 @@ namespace { -std::pair createDictionaryFromAST( +String createDictionaryFromAST( ASTCreateQuery ast_create_query, - const String & database_name, - const Context & context) + const String & database_name) { ast_create_query.database = database_name; if (!ast_create_query.dictionary_attributes_list) throw Exception("Missing definition of dictionary attributes.", ErrorCodes::EMPTY_LIST_OF_ATTRIBUTES_PASSED); - return - { - ast_create_query.table, - DictionaryFactory::instance().create(ast_create_query.table, ast_create_query, context) - }; + return ast_create_query.table; } void loadObject( @@ -83,10 +78,8 @@ try { if (query.is_dictionary) { - String dictionary_name; - DictionaryPtr dictionary; - std::tie(dictionary_name, dictionary) = createDictionaryFromAST(query, database_name, context); - database.attachDictionary(dictionary_name, dictionary); + String dictionary_name = createDictionaryFromAST(query, database_name); + database.attachDictionary(dictionary_name); } else { @@ -222,7 +215,8 @@ void DatabaseOrdinary::loadDictionaries(Context & context) LOG_INFO(log, "Loading dictionaries."); auto dictionaries_repository = std::make_unique(shared_from_this(), context); - context.getExternalDictionariesLoader().addConfigRepository(getDatabaseName(), std::move(dictionaries_repository), {}); + context.getExternalDictionariesLoader().addConfigRepository( + getDatabaseName(), std::move(dictionaries_repository), {"dictionary", "name"}); } @@ -238,10 +232,9 @@ void DatabaseOrdinary::createTable( void DatabaseOrdinary::createDictionary( const Context & context, const String & dictionary_name, - const DictionaryPtr & dictionary, const ASTPtr & query) { - DatabaseOnDisk::createDictionary(*this, context, dictionary_name, dictionary, query); + DatabaseOnDisk::createDictionary(*this, context, dictionary_name, query); } void DatabaseOrdinary::removeTable( diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index 26309ff4b0b..768e1bdcd32 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -31,7 +31,6 @@ public: void createDictionary( const Context & context, const String & dictionary_name, - const DictionaryPtr & dict_ptr, const ASTPtr & query) override; void removeTable( diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index d7485e0b91b..ef5587274ca 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -55,16 +55,6 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable( return it->second; } -DictionaryPtr DatabaseWithOwnTablesBase::tryGetDictionary(const Context & /*context*/, const String & dictionary_name) const -{ - std::lock_guard dict_lock{mutex}; - auto it = dictionaries.find(dictionary_name); - if (it == dictionaries.end()) - return {}; - - return it->second; -} - DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(const Context & /*context*/, const FilterByNameFunction & filter_by_table_name) { std::lock_guard lock(mutex); @@ -85,9 +75,9 @@ DatabaseDictionariesIteratorPtr DatabaseWithOwnTablesBase::getDictionariesIterat return std::make_unique(dictionaries); Dictionaries filtered_dictionaries; - for (const auto & [dictionary_name, dictionary] : dictionaries) + for (const auto & dictionary_name : dictionaries) if (filter_by_dictionary_name(dictionary_name)) - filtered_dictionaries.emplace(dictionary_name, dictionary); + filtered_dictionaries.emplace(dictionary_name); return std::make_unique(std::move(filtered_dictionaries)); } @@ -112,19 +102,14 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name) return res; } -DictionaryPtr DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name) +void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name) { - DictionaryPtr res; - { - std::lock_guard lock(mutex); - auto it = dictionaries.find(dictionary_name); - if (it == dictionaries.end()) - throw Exception("Dictionary " + name + "." + dictionary_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); - res = it->second; - dictionaries.erase(it); - } + std::lock_guard lock(mutex); + auto it = dictionaries.find(dictionary_name); + if (it == dictionaries.end()) + throw Exception("Dictionary " + name + "." + dictionary_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); + dictionaries.erase(it); - return res; } void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const StoragePtr & table) @@ -135,10 +120,10 @@ void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const Sto } -void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name, const DictionaryPtr & dictionary) +void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name) { std::lock_guard lock(mutex); - if (!dictionaries.emplace(dictionary_name, dictionary).second) + if (!dictionaries.emplace(dictionary_name).second) throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); } diff --git a/dbms/src/Databases/DatabasesCommon.h b/dbms/src/Databases/DatabasesCommon.h index 02ff2cd8483..83e16ffcd3f 100644 --- a/dbms/src/Databases/DatabasesCommon.h +++ b/dbms/src/Databases/DatabasesCommon.h @@ -29,17 +29,15 @@ public: const Context & context, const String & table_name) const override; - DictionaryPtr tryGetDictionary(const Context & context, const String & dictionary_name) const override; - bool empty(const Context & context) const override; void attachTable(const String & table_name, const StoragePtr & table) override; - void attachDictionary(const String & name, const DictionaryPtr & dictionary) override; + void attachDictionary(const String & name) override; StoragePtr detachTable(const String & table_name) override; - DictionaryPtr detachDictionary(const String & name) override; + void detachDictionary(const String & name) override; DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 690518727c3..d55ab4eb748 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -20,6 +20,7 @@ struct ConstraintsDescription; class ColumnsDescription; struct IndicesDescription; struct TableStructureWriteLockHolder; +using Dictionaries = std::set; namespace ErrorCodes { @@ -76,9 +77,7 @@ public: bool isValid() const { return !dictionaries.empty() && it != dictionaries.end(); } - const String & name() const { return it->first; } - - DictionaryPtr & dictionary() const { return it->second; } + const String & name() const { return *it; } }; using DatabaseTablesIteratorPtr = std::unique_ptr; @@ -121,11 +120,6 @@ public: const Context & context, const String & name) const = 0; - /// Get the dictionary for work. Return nullptr if there is no table. - virtual DictionaryPtr tryGetDictionary( - const Context & context, - const String & name) const = 0; - using FilterByNameFunction = std::function; /// Get an iterator that allows you to pass through all the tables. @@ -149,7 +143,6 @@ public: virtual void createDictionary( const Context & context, const String & dictionary_name, - const DictionaryPtr & dict_ptr, const ASTPtr & query) = 0; /// Delete the table from the database. Delete the metadata. @@ -166,13 +159,13 @@ public: virtual void attachTable(const String & name, const StoragePtr & table) = 0; /// Add dictionary to the database, but do not add it to the metadata. The database may not support this method. - virtual void attachDictionary(const String & name, const DictionaryPtr & dictionary) = 0; + virtual void attachDictionary(const String & name) = 0; /// Forget about the table without deleting it, and return it. The database may not support this method. virtual StoragePtr detachTable(const String & name) = 0; /// Forget about the dictionary without deleting it, and return it. The database may not support this method. - virtual DictionaryPtr detachDictionary(const String & name) = 0; + virtual void detachDictionary(const String & name) = 0; /// Rename the table and possibly move the table to another database. virtual void renameTable( diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index 149eb1ddd15..b51974b5ae6 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -35,7 +35,7 @@ DictionaryPtr DictionaryFactory::create( const DictionaryStructure dict_struct{config, config_prefix + ".structure"}; - auto source_ptr = DictionarySourceFactory::instance().create(name, config, config_prefix + ".source", dict_struct, context); + DictionarySourcePtr source_ptr = DictionarySourceFactory::instance().create(name, config, config_prefix + ".source", dict_struct, context); const auto & layout_type = keys.front(); @@ -43,8 +43,8 @@ DictionaryPtr DictionaryFactory::create( const auto found = registered_layouts.find(layout_type); if (found != registered_layouts.end()) { - const auto & create_layout = found->second; - return create_layout(name, dict_struct, config, config_prefix, std::move(source_ptr)); + const auto & layout_creator = found->second; + return layout_creator(name, dict_struct, config, config_prefix, std::move(source_ptr)); } } diff --git a/dbms/src/Dictionaries/IDictionary_fwd.h b/dbms/src/Dictionaries/IDictionary_fwd.h index 362d85fa26d..864dad217b6 100644 --- a/dbms/src/Dictionaries/IDictionary_fwd.h +++ b/dbms/src/Dictionaries/IDictionary_fwd.h @@ -2,7 +2,7 @@ #include -#include +#include #include namespace DB @@ -10,6 +10,5 @@ namespace DB struct IDictionaryBase; using DictionaryPtr = std::shared_ptr; -using Dictionaries = std::map; } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 23588275c3e..42b3bf1d4d2 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 4cb8959aa60..b9edfb28d6a 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -55,7 +55,7 @@ public: using ObjectConfigsPtr = std::shared_ptr>; - /// Reads configuration files. + /// Reads configurations. ObjectConfigsPtr read() { std::lock_guard lock{mutex}; @@ -89,7 +89,7 @@ public: } private: - struct LoadablesInfos + struct LoadablesInfos { Poco::Timestamp last_update_time = 0; std::vector> configs; // Parsed file's contents. @@ -146,19 +146,19 @@ private: bool readLoadablesInfo( IExternalLoaderConfigRepository & repository, - const String & path, + const String & name, const ExternalLoaderConfigSettings & settings, LoadablesInfos & loadable_info) const { try { - if (path.empty() || !repository.exists(path)) + if (name.empty() || !repository.exists(name)) { - LOG_WARNING(log, "Config file '" + path + "' does not exist"); + LOG_WARNING(log, "Config file '" + name + "' does not exist"); return false; } - auto update_time_from_repository = repository.getUpdateTime(path); + auto update_time_from_repository = repository.getUpdateTime(name); /// Actually it can't be less, but for sure we check less or equal if (update_time_from_repository <= loadable_info.last_update_time) @@ -167,7 +167,7 @@ private: return false; } - auto file_contents = repository.load(path); + auto file_contents = repository.load(name); /// get all objects' definitions Poco::Util::AbstractConfiguration::Keys keys; @@ -180,18 +180,18 @@ private: if (!startsWith(key, settings.external_config)) { if (!startsWith(key, "comment") && !startsWith(key, "include_from")) - LOG_WARNING(log, path << ": file contains unknown node '" << key << "', expected '" << settings.external_config << "'"); + LOG_WARNING(log, name << ": file contains unknown node '" << key << "', expected '" << settings.external_config << "'"); continue; } - String name = file_contents->getString(key + "." + settings.external_name); - if (name.empty()) + String external_name = file_contents->getString(key + "." + settings.external_name); + if (external_name.empty()) { - LOG_WARNING(log, path << ": node '" << key << "' defines " << type_name << " with an empty name. It's not allowed"); + LOG_WARNING(log, name << ": node '" << key << "' defines " << type_name << " with an empty name. It's not allowed"); continue; } - configs_from_file.emplace_back(name, ObjectConfig{path, file_contents, key}); + configs_from_file.emplace_back(name, ObjectConfig{external_name, file_contents, key}); } loadable_info.configs = std::move(configs_from_file); @@ -201,7 +201,7 @@ private: } catch (...) { - tryLogCurrentException(log, "Failed to read config file '" + path + "'"); + tryLogCurrentException(log, "Failed to load config for dictionary '" + name + "'"); return false; } } diff --git a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp index daed7f456a2..bd89f27def1 100644 --- a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp +++ b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.cpp @@ -31,10 +31,6 @@ LoadablesConfigurationPtr ExternalLoaderDatabaseConfigRepository::load(const std bool ExternalLoaderDatabaseConfigRepository::exists(const std::string & loadable_definition_name) const { - std::cerr << "IS EXISTS:" - << loadable_definition_name << std::endl; - std::cerr << "CUTTED:" - << trimDatabaseName(loadable_definition_name, database) << std::endl; return database->isDictionaryExist( context, trimDatabaseName(loadable_definition_name, database)); } @@ -54,7 +50,6 @@ std::set ExternalLoaderDatabaseConfigRepository::getAllLoadablesDef result.insert(dbname + "." + itr->name()); itr->next(); } - std::cerr << "RESULTSIZE:" << result.size() << std::endl; return result; } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 0d2576477dd..3d222029a27 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -161,7 +161,6 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) try { - std::cerr << "ADDING DB NAME:" << database_name << std::endl; context.addDatabase(database_name, database); if (need_write_metadata) @@ -729,11 +728,10 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create) create.attach = true; } - auto res = DictionaryFactory::instance().create(dictionary_name, create, context.getGlobalContext()); if (create.attach) - database->attachDictionary(dictionary_name, res); + database->attachDictionary(dictionary_name); else - database->createDictionary(context, dictionary_name, res, query_ptr); + database->createDictionary(context, dictionary_name, query_ptr); return {}; } diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index d78a3337ba3..ecf1730ca1c 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -1,5 +1,5 @@ =DICTIONARY in Ordinary DB -CREATE DICTIONARY ordinary_db.dict1 (`key_column` UInt64 DEFAULT 0, `second_column` UInt8 DEFAULT 1, `third_column` String DEFAULT \'qqq\') PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) +CREATE DICTIONARY ordinary_db.dict1 (`key_column` UInt64 DEFAULT 0, `second_column` UInt8 DEFAULT 1, `third_column` String DEFAULT \'qqq\') PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict\')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) dict1 1 ordinary_db dict1 From 01e23f0a22979e272cdb2bd870b9c9eedbd41e24 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 16 Oct 2019 20:06:52 +0300 Subject: [PATCH 021/122] Better load of dictionaries --- dbms/src/Databases/DatabaseDictionary.cpp | 4 +-- dbms/src/Databases/DatabaseDictionary.h | 4 +-- dbms/src/Databases/DatabaseLazy.cpp | 6 ++-- dbms/src/Databases/DatabaseLazy.h | 4 +-- dbms/src/Databases/DatabaseMemory.cpp | 8 ++--- dbms/src/Databases/DatabaseMySQL.h | 4 +-- dbms/src/Databases/DatabaseOnDisk.cpp | 21 +++++------- dbms/src/Databases/DatabaseOrdinary.cpp | 18 ++++------ dbms/src/Databases/DatabaseOrdinary.h | 1 - dbms/src/Databases/DatabasesCommon.cpp | 34 +++++++++++++------ dbms/src/Databases/DatabasesCommon.h | 4 +-- dbms/src/Databases/IDatabase.h | 4 +-- dbms/src/Interpreters/ExternalLoader.cpp | 14 ++++---- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- .../src/Interpreters/InterpreterDropQuery.cpp | 2 +- .../01018_ddl_dictionaries_create.sql | 12 +++---- 16 files changed, 74 insertions(+), 68 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index d498d8a3f86..0e997f8d914 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -98,13 +98,13 @@ void DatabaseDictionary::removeDictionary( } void DatabaseDictionary::attachDictionary( - const String & /*dictionary_name*/) + const String & /*dictionary_name*/, const Context & /*context*/, bool /*load*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); } void DatabaseDictionary::detachDictionary( - const String & /*dictionary_name*/) + const String & /*dictionary_name*/, const Context & /*context*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index e4b02412a43..7607ee61c0d 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -89,9 +89,9 @@ public: ASTPtr tryGetCreateDictionaryQuery(const Context & context, const String & table_name) const override; - void attachDictionary(const String & dictionary_name) override; + void attachDictionary(const String & dictionary_name, const Context & context, bool load) override; - void detachDictionary(const String & dictionary_name) override; + void detachDictionary(const String & dictionary_name, const Context & context) override; void shutdown() override; diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index ed7c8ebcbf5..022509fbcd8 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -122,12 +122,14 @@ DatabaseDictionariesIteratorPtr DatabaseLazy::getDictionariesIterator( } void DatabaseLazy::attachDictionary( - const String & /*dictionary_name*/) + const String & /*dictionary_name*/, + const Context & /*context*/, + bool /*load*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } -void DatabaseLazy::detachDictionary(const String & /*dictionary_name*/) +void DatabaseLazy::detachDictionary(const String & /*dictionary_name*/, const Context & /*context*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index fdd4f18083a..19d0d4cb51f 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -111,9 +111,9 @@ public: StoragePtr detachTable(const String & table_name) override; - void attachDictionary(const String & dictionary_name) override; + void attachDictionary(const String & dictionary_name, const Context & context, bool load) override; - void detachDictionary(const String & dictionary_name) override; + void detachDictionary(const String & dictionary_name, const Context & context) override; void shutdown() override; diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 6d3379d9a0c..318709903c6 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -35,11 +35,11 @@ void DatabaseMemory::createTable( void DatabaseMemory::createDictionary( - const Context & /*context*/, + const Context & context, const String & dictionary_name, const ASTPtr & /*query*/) { - attachDictionary(dictionary_name); + attachDictionary(dictionary_name, context, true); } @@ -52,10 +52,10 @@ void DatabaseMemory::removeTable( void DatabaseMemory::removeDictionary( - const Context &, + const Context & context, const String & dictionary_name) { - detachDictionary(dictionary_name); + detachDictionary(dictionary_name, context); } diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index cf06a347f9a..938d7c4ec77 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -63,7 +63,7 @@ public: throw Exception("MySQL database engine does not support detach table.", ErrorCodes::NOT_IMPLEMENTED); } - void detachDictionary(const String &) override + void detachDictionary(const String &, const Context &) override { throw Exception("MySQL database engine does not support detach dictionary.", ErrorCodes::NOT_IMPLEMENTED); } @@ -89,7 +89,7 @@ public: throw Exception("MySQL database engine does not support attach table.", ErrorCodes::NOT_IMPLEMENTED); } - void attachDictionary(const String &) override + void attachDictionary(const String &, const Context &, bool) override { throw Exception("MySQL database engine does not support attach dictionary.", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index a4b98e9eb47..d04321f8357 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -296,21 +296,22 @@ void DatabaseOnDisk::createDictionary( try { - database.attachDictionary(dictionary_name); + /// Do not load it now + database.attachDictionary(dictionary_name, context, false); /// If it was ATTACH query and file with table metadata already exist /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one. Poco::File(dictionary_metadata_tmp_path).renameTo(dictionary_metadata_path); + + /// Load dictionary + bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); + context.getExternalDictionariesLoader().reload(database.getDatabaseName() + "." + dictionary_name, !lazy_load); } catch (...) { Poco::File(dictionary_metadata_tmp_path).remove(); throw; } - - const auto & config = context.getConfigRef(); - context.getExternalDictionariesLoader().reload( - database.getDatabaseName() + "." + dictionary_name, config.getBool("dictionaries_lazy_load", true)); } @@ -347,11 +348,11 @@ void DatabaseOnDisk::removeTable( void DatabaseOnDisk::removeDictionary( IDatabase & database, - const Context & /*context*/, + const Context & context, const String & dictionary_name, Poco::Logger * log) { - database.detachDictionary(dictionary_name); + database.detachDictionary(dictionary_name, context); String dictionary_metadata_path = database.getObjectMetadataPath(dictionary_name); @@ -370,7 +371,7 @@ void DatabaseOnDisk::removeDictionary( { LOG_WARNING(log, getCurrentExceptionMessage(__PRETTY_FUNCTION__)); } - database.attachDictionary(dictionary_name); + database.attachDictionary(dictionary_name, context); throw; } } @@ -482,13 +483,9 @@ time_t DatabaseOnDisk::getObjectMetadataModificationTime( Poco::File meta_file(table_metadata_path); if (meta_file.exists()) - { return meta_file.getLastModified().epochTime(); - } else - { return static_cast(0); - } } void DatabaseOnDisk::iterateMetadataFiles(const IDatabase & database, Poco::Logger * log, const IteratingFunction & iterating_function) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index cabe0a943c2..52fd477ef0e 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -79,7 +79,7 @@ try if (query.is_dictionary) { String dictionary_name = createDictionaryFromAST(query, database_name); - database.attachDictionary(dictionary_name); + database.attachDictionary(dictionary_name, context, false); } else { @@ -125,6 +125,7 @@ void DatabaseOrdinary::loadStoredObjects( Context & context, bool has_force_restore_data_flag) { + /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, * which does not correspond to order tables creation and does not correspond to order of their location on disk. @@ -183,7 +184,10 @@ void DatabaseOrdinary::loadStoredObjects( /// After all tables was basically initialized, startup them. startupTables(pool); - loadDictionaries(context); + /// Add database as repository + auto dictionaries_repository = std::make_unique(shared_from_this(), context); + context.getExternalDictionariesLoader().addConfigRepository( + getDatabaseName(), std::move(dictionaries_repository), {"dictionary", "name"}); } @@ -210,16 +214,6 @@ void DatabaseOrdinary::startupTables(ThreadPool & thread_pool) thread_pool.wait(); } -void DatabaseOrdinary::loadDictionaries(Context & context) -{ - LOG_INFO(log, "Loading dictionaries."); - - auto dictionaries_repository = std::make_unique(shared_from_this(), context); - context.getExternalDictionariesLoader().addConfigRepository( - getDatabaseName(), std::move(dictionaries_repository), {"dictionary", "name"}); -} - - void DatabaseOrdinary::createTable( const Context & context, const String & table_name, diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index 768e1bdcd32..8046b7482b8 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -91,7 +91,6 @@ private: Poco::Logger * log; void startupTables(ThreadPool & thread_pool); - void loadDictionaries(Context & context); }; } diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index ef5587274ca..84adba87604 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -102,13 +104,17 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name) return res; } -void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name) +void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name, const Context & context) { - std::lock_guard lock(mutex); - auto it = dictionaries.find(dictionary_name); - if (it == dictionaries.end()) - throw Exception("Dictionary " + name + "." + dictionary_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); - dictionaries.erase(it); + { + std::lock_guard lock(mutex); + auto it = dictionaries.find(dictionary_name); + if (it == dictionaries.end()) + throw Exception("Dictionary " + name + "." + dictionary_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); + dictionaries.erase(it); + } + + context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, true); } @@ -120,11 +126,19 @@ void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const Sto } -void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name) +void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name, const Context & context, bool load) { - std::lock_guard lock(mutex); - if (!dictionaries.emplace(dictionary_name).second) - throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); + { + std::lock_guard lock(mutex); + if (!dictionaries.emplace(dictionary_name).second) + throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); + } + + if (load) + { + bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); + context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, !lazy_load); + } } void DatabaseWithOwnTablesBase::shutdown() diff --git a/dbms/src/Databases/DatabasesCommon.h b/dbms/src/Databases/DatabasesCommon.h index 83e16ffcd3f..aec660cf945 100644 --- a/dbms/src/Databases/DatabasesCommon.h +++ b/dbms/src/Databases/DatabasesCommon.h @@ -33,11 +33,11 @@ public: void attachTable(const String & table_name, const StoragePtr & table) override; - void attachDictionary(const String & name) override; + void attachDictionary(const String & name, const Context & context, bool load) override; StoragePtr detachTable(const String & table_name) override; - void detachDictionary(const String & name) override; + void detachDictionary(const String & name, const Context & context) override; DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index d55ab4eb748..d3147a36991 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -159,13 +159,13 @@ public: virtual void attachTable(const String & name, const StoragePtr & table) = 0; /// Add dictionary to the database, but do not add it to the metadata. The database may not support this method. - virtual void attachDictionary(const String & name) = 0; + virtual void attachDictionary(const String & name, const Context & context, bool load=true) = 0; /// Forget about the table without deleting it, and return it. The database may not support this method. virtual StoragePtr detachTable(const String & name) = 0; /// Forget about the dictionary without deleting it, and return it. The database may not support this method. - virtual void detachDictionary(const String & name) = 0; + virtual void detachDictionary(const String & name, const Context & context) = 0; /// Rename the table and possibly move the table to another database. virtual void renameTable( diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index b9edfb28d6a..71e04091e3a 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -112,8 +112,8 @@ private: const auto names = repo_with_settings.first->getAllLoadablesDefinitionNames(); for (const auto & name : names) { - auto it = loadables_infos.find(name); - if (it != loadables_infos.end()) + auto it = loadables_infos.find(name); + if (it != loadables_infos.end()) { LoadablesInfos & loadable_info = it->second; if (readLoadablesInfo(*repo_with_settings.first, name, repo_with_settings.second, loadable_info)) @@ -131,14 +131,14 @@ private: } } - std::vector deleted_files; + std::vector deleted_names; for (auto & [path, loadable_info] : loadables_infos) if (!loadable_info.in_use) - deleted_files.emplace_back(path); - if (!deleted_files.empty()) + deleted_names.emplace_back(path); + if (!deleted_names.empty()) { - for (const String & deleted_file : deleted_files) - loadables_infos.erase(deleted_file); + for (const String & deleted_name : deleted_names) + loadables_infos.erase(deleted_name); changed = true; } return changed; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 3d222029a27..fc884beff07 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -729,7 +729,7 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create) } if (create.attach) - database->attachDictionary(dictionary_name); + database->attachDictionary(dictionary_name, context); else database->createDictionary(context, dictionary_name, query_ptr); diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 5e78f4cffcd..3b1d80ee35f 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -179,7 +179,7 @@ BlockIO InterpreterDropQuery::executeToDictionary( if (kind == ASTDropQuery::Kind::Detach) { /// Drop dictionary from memory, don't touch data and metadata - database->detachDictionary(dictionary_name); + database->detachDictionary(dictionary_name, context); } else if (kind == ASTDropQuery::Kind::Truncate) { diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 6720cb576b0..f5499f21bfe 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -27,9 +27,9 @@ DROP DICTIONARY IF EXISTS ordinary_db.dict1; CREATE DICTIONARY ordinary_db.dict1 ( - key_column UInt64 DEFAULT 0, - second_column UInt8 DEFAULT 1, - third_column String DEFAULT 'qqq' + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) @@ -138,9 +138,9 @@ SELECT '=DICTIONARY in Dictionary DB'; CREATE DICTIONARY dictionary_db.dict2 ( - key_column UInt64 DEFAULT 0 INJECTIVE, - second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, - third_column String DEFAULT 'qqq' + key_column UInt64 DEFAULT 0 INJECTIVE, + second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, + third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column, second_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) From fa557ab80da02d156201fb211a0a5c93f903ebad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 16 Oct 2019 20:11:19 +0300 Subject: [PATCH 022/122] Better tests --- .../01018_ddl_dictionaries_select.reference | 8 ++ .../01018_ddl_dictionaries_select.sql | 75 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference new file mode 100644 index 00000000000..ea65974b190 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -0,0 +1,8 @@ +11 +144 +7 +17 +11 +11 +7 +11 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql new file mode 100644 index 00000000000..b2a56f5e824 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -0,0 +1,75 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +CREATE TABLE database_for_dict.table_for_dict +( + key_column UInt64, + second_column UInt8, + third_column String, + fourth_column Float64 +) +ENGINE = MergeTree() +ORDER BY key_column; + +INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100); + +DROP DICTIONARY IF EXISTS database_for_dict.dict1; + +CREATE DICTIONARY database_for_dict.dict1 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); +SELECT dictGetString('database_for_dict.dict1', 'third_column', toUInt64(12)); +SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', toUInt64(14)); + +select count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); + +DETACH DICTIONARY database_for_dict.dict1; + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} + +ATTACH DICTIONARY database_for_dict.dict1; + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); + +DROP DICTIONARY database_for_dict.dict1; + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} + +CREATE DICTIONARY database_for_dict.dict1 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column, third_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 50)); + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); +SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', tuple(toUInt64(14), '196')); + +DETACH DICTIONARY database_for_dict.dict1; + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); -- {serverError 36} + +ATTACH DICTIONARY database_for_dict.dict1; + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); + +DROP DATABASE IF EXISTS database_for_dict; + From 51de7a1cd1c30ca4a717bfa3c3a971fa86cff5f3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 16 Oct 2019 20:12:53 +0300 Subject: [PATCH 023/122] Better tests --- .../queries/0_stateless/01018_ddl_dictionaries_select.sql | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index b2a56f5e824..eb01d917594 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -56,9 +56,9 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column, third_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) -LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 50)); +LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 1)); SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', tuple(toUInt64(14), '196')); @@ -72,4 +72,3 @@ ATTACH DICTIONARY database_for_dict.dict1; SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); DROP DATABASE IF EXISTS database_for_dict; - From 483108f46f7e9557e5df0cdcecbe578bd0b80c98 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 16 Oct 2019 20:33:53 +0300 Subject: [PATCH 024/122] column to table matching refactoring --- .../Interpreters/CrossToInnerJoinVisitor.cpp | 55 ++++++------------ .../FindIdentifierBestTableVisitor.cpp | 14 +---- dbms/src/Interpreters/IdentifierSemantic.cpp | 57 +++++++++++++------ dbms/src/Interpreters/IdentifierSemantic.h | 3 +- .../JoinToSubqueryTransformVisitor.cpp | 8 ++- .../TranslateQualifiedNamesVisitor.cpp | 3 +- 6 files changed, 70 insertions(+), 70 deletions(-) diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index 52cb05d35ea..5ac28bc6dde 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -90,9 +90,12 @@ public: using TypeToVisit = const ASTFunction; CheckExpressionVisitorData(const std::vector & tables_) - : tables(tables_) + : joined_tables(tables_) , ands_only(true) - {} + { + for (auto & joined : joined_tables) + tables.push_back(joined.table); + } void visit(const ASTFunction & node, ASTPtr & ast) { @@ -156,7 +159,8 @@ public: } private: - const std::vector & tables; + const std::vector & joined_tables; + std::vector tables; std::map> asts_to_join_on; bool ands_only; @@ -180,31 +184,16 @@ private: /// @return table position to attach expression to or 0. size_t checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right) { - /// {best_match, best_table_pos} - std::pair left_best{0, 0}; - std::pair right_best{0, 0}; + size_t left_table_pos = 0; + bool left_match = IdentifierSemantic::chooseTable(left, tables, left_table_pos); - for (size_t i = 0; i < tables.size(); ++i) + size_t right_table_pos = 0; + bool right_match = IdentifierSemantic::chooseTable(right, tables, right_table_pos); + + if (left_match && right_match && (left_table_pos != right_table_pos)) { - size_t match = IdentifierSemantic::canReferColumnToTable(left, tables[i].table); - if (match > left_best.first) - { - left_best.first = match; - left_best.second = i; - } - - match = IdentifierSemantic::canReferColumnToTable(right, tables[i].table); - if (match > right_best.first) - { - right_best.first = match; - right_best.second = i; - } - } - - if (left_best.first && right_best.first && (left_best.second != right_best.second)) - { - size_t table_pos = std::max(left_best.second, right_best.second); - if (tables[table_pos].canAttachOnExpression()) + size_t table_pos = std::max(left_table_pos, right_table_pos); + if (joined_tables[table_pos].canAttachOnExpression()) return table_pos; } return 0; @@ -212,20 +201,10 @@ private: size_t checkIdentifier(const ASTIdentifier & identifier) { - size_t best_match = 0; size_t best_table_pos = 0; + bool match = IdentifierSemantic::chooseTable(identifier, tables, best_table_pos); - for (size_t i = 0; i < tables.size(); ++i) - { - size_t match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i].table); - if (match > best_match) - { - best_match = match; - best_table_pos = i; - } - } - - if (best_match && tables[best_table_pos].canAttachOnExpression()) + if (match && joined_tables[best_table_pos].canAttachOnExpression()) return best_table_pos; return 0; } diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp index 8173ce3256a..daf9ca57fb9 100644 --- a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp @@ -28,17 +28,9 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &) } else { - // FIXME: make a better matcher using `names`? - size_t best_match = 0; - for (const auto & table_names : tables) - { - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table_names.first)) - if (match > best_match) - { - best_match = match; - best_table = &table_names.first; - } - } + size_t best_table_pos = 0; + if (IdentifierSemantic::chooseTable(identifier, tables, best_table_pos)) + best_table = &tables[best_table_pos].first; } identifier_table.emplace_back(&identifier, best_table); diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index 6b74dc2d8d1..f5006159aa9 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -5,6 +5,37 @@ namespace DB { +namespace +{ + +const DatabaseAndTableWithAlias & extractTable(const DatabaseAndTableWithAlias & table) +{ + return table; +} + +const DatabaseAndTableWithAlias & extractTable(const TableWithColumnNames & table) +{ + return table.first; +} + +template +bool tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, size_t & best_table_pos) +{ + best_table_pos = 0; + size_t best_match = 0; + for (size_t i = 0; i < tables.size(); ++i) + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i]))) + if (match > best_match) + { + best_match = match; + best_table_pos = i; + } + + return best_match; +} + +} + std::optional IdentifierSemantic::getColumnName(const ASTIdentifier & node) { if (!node.semantic->special) @@ -57,26 +88,16 @@ size_t IdentifierSemantic::getMembership(const ASTIdentifier & identifier) return identifier.semantic->membership; } -bool IdentifierSemantic::trySetMembership(ASTIdentifier & identifier, const std::vector & tables, - size_t & best_table_pos) +bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, + size_t & best_table_pos) { - best_table_pos = 0; - size_t best_match = 0; - for (size_t i = 0; i < tables.size(); ++i) - if (size_t match = canReferColumnToTable(identifier, tables[i].first)) - if (match > best_match) - { - best_match = match; - best_table_pos = i; - } + return tryChooseTable(identifier, tables, best_table_pos); +} - if (best_match) - { - setMembership(identifier, best_table_pos + 1); - return true; - } - - return false; +bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, + size_t & best_table_pos) +{ + return tryChooseTable(identifier, tables, best_table_pos); } std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 4fde404488c..7403f5d2340 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -35,7 +35,8 @@ struct IdentifierSemantic static bool canBeAlias(const ASTIdentifier & identifier); static void setMembership(ASTIdentifier & identifier, size_t table_no); static size_t getMembership(const ASTIdentifier & identifier); - static bool trySetMembership(ASTIdentifier & identifier, const std::vector & tables, size_t & best_table_pos); + static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos); + static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos); private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index b60e6533921..f807bfb7acb 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -163,7 +163,13 @@ struct ColumnAliasesMatcher auto it = rev_aliases.find(long_name); if (it == rev_aliases.end()) { - bool last_table = IdentifierSemantic::canReferColumnToTable(*identifier, tables.back()); + bool last_table = false; + { + size_t best_table_pos = 0; + if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos)) + last_table = (best_table_pos + 1 == tables.size()); + } + if (!last_table) { String alias = hide_prefix + long_name; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index e05387b5aa0..7226ce9d4dd 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -62,7 +62,8 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (IdentifierSemantic::getColumnName(identifier)) { size_t best_table_pos = 0; - IdentifierSemantic::trySetMembership(identifier, data.tables, best_table_pos); + if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos)) + IdentifierSemantic::setMembership(identifier, best_table_pos + 1); /// In case if column from the joined table are in source columns, change it's name to qualified. if (best_table_pos && data.hasColumn(identifier.shortName())) From a19a699befca481311aa81f0770cf0af8a380b07 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 16 Oct 2019 20:52:56 +0300 Subject: [PATCH 025/122] Fix confused variables --- dbms/src/Interpreters/ExternalLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 71e04091e3a..5a5d4cc80ad 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -191,7 +191,7 @@ private: continue; } - configs_from_file.emplace_back(name, ObjectConfig{external_name, file_contents, key}); + configs_from_file.emplace_back(external_name, ObjectConfig{name, file_contents, key}); } loadable_info.configs = std::move(configs_from_file); From 842a31be8b991b50505a99adfd018f7338d31c88 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 16 Oct 2019 20:59:10 +0300 Subject: [PATCH 026/122] More tests --- .../01018_ddl_dictionaries_select.reference | 1 + .../0_stateless/01018_ddl_dictionaries_select.sql | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference index ea65974b190..eac68a5ab59 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -6,3 +6,4 @@ 11 7 11 +6 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index eb01d917594..100cc26cb3d 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -71,4 +71,17 @@ ATTACH DICTIONARY database_for_dict.dict1; SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', tuple(toUInt64(11), '121')); +CREATE DICTIONARY database_for_dict.dict2 +( + key_column UInt64 DEFAULT 0, + some_column String EXPRESSION toString(fourth_column), + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED()); + +SELECT dictGetString('database_for_dict.dict2', 'some_column', toUInt64(12)); + DROP DATABASE IF EXISTS database_for_dict; From 3987a45ea21d2a4b9a33d19456d1f766216505fd Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Oct 2019 12:05:48 +0300 Subject: [PATCH 027/122] Fix build and style --- .../Dictionaries/getDictionaryConfigurationFromAST.cpp | 6 +++++- dbms/src/Interpreters/Context.cpp | 1 - dbms/src/Interpreters/ExternalLoader.cpp | 10 +++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 9772cc17c13..09e3abc1ac2 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -111,7 +111,8 @@ void buildLayoutConfiguration( void buildRangeConfiguration(AutoPtr doc, AutoPtr root, const ASTDictionaryRange * range) { // appends value to root - auto appendElem = [&doc, &root](const std::string & key, const std::string & value) { + auto appendElem = [&doc, &root](const std::string & key, const std::string & value) + { AutoPtr element(doc->createElement(key)); AutoPtr name(doc->createElement("name")); AutoPtr text(doc->createTextNode(value)); @@ -268,6 +269,9 @@ void buildPrimaryKeyConfiguration( break; } } + if (!found) + throw Exception( + "Primary key field '" + key_name + "' not found among attributes.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); } } } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 42b3bf1d4d2..23588275c3e 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5a5d4cc80ad..70f22cb9afe 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -110,21 +110,21 @@ private: for (const auto & [name, repo_with_settings] : repositories) { const auto names = repo_with_settings.first->getAllLoadablesDefinitionNames(); - for (const auto & name : names) + for (const auto & loadable_name : names) { - auto it = loadables_infos.find(name); + auto it = loadables_infos.find(loadable_name); if (it != loadables_infos.end()) { LoadablesInfos & loadable_info = it->second; - if (readLoadablesInfo(*repo_with_settings.first, name, repo_with_settings.second, loadable_info)) + if (readLoadablesInfo(*repo_with_settings.first, loadable_name, repo_with_settings.second, loadable_info)) changed = true; } else { LoadablesInfos loadable_info; - if (readLoadablesInfo(*repo_with_settings.first, name, repo_with_settings.second, loadable_info)) + if (readLoadablesInfo(*repo_with_settings.first, loadable_name, repo_with_settings.second, loadable_info)) { - loadables_infos.emplace(name, std::move(loadable_info)); + loadables_infos.emplace(loadable_name, std::move(loadable_info)); changed = true; } } From 6dfe5c714261adecb0eea56a3bb7368216049c5c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Oct 2019 16:05:12 +0300 Subject: [PATCH 028/122] Better introspection --- dbms/src/Databases/DatabaseDictionary.cpp | 22 +++--- dbms/src/Databases/DatabaseMemory.cpp | 26 +++++-- dbms/src/Databases/DatabaseMemory.h | 9 +++ dbms/src/Databases/DatabasesCommon.cpp | 56 ++++++++++++++- dbms/src/Databases/DatabasesCommon.h | 2 + dbms/src/Databases/IDatabase.h | 6 ++ .../ExternalDictionariesLoader.cpp | 2 +- dbms/src/Interpreters/ExternalLoader.cpp | 32 +++++++-- dbms/src/Interpreters/ExternalLoader.h | 2 + .../IExternalLoaderConfigRepository.h | 1 + .../Storages/System/StorageSystemColumns.cpp | 2 +- .../System/StorageSystemDictionaries.cpp | 69 ++----------------- .../Storages/System/StorageSystemTables.cpp | 2 +- .../01018_ddl_dictionaries_create.reference | 8 --- .../01018_ddl_dictionaries_create.sql | 33 +-------- .../01018_ddl_dictionaries_select.reference | 8 +++ .../01018_ddl_dictionaries_select.sql | 7 ++ 17 files changed, 160 insertions(+), 127 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 0e997f8d914..6e946bf0d79 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -35,25 +35,29 @@ void DatabaseDictionary::loadStoredObjects(Context &, bool) Tables DatabaseDictionary::listTables(const Context & context, const FilterByNameFunction & filter_by_name) { Tables tables; - ExternalLoader::Loadables loadables; + ExternalLoader::LoadResults load_results; if (filter_by_name) { /// If `filter_by_name` is set, we iterate through all dictionaries with such names. That's why we need to load all of them. - loadables = context.getExternalDictionariesLoader().loadAndGet(filter_by_name); + context.getExternalDictionariesLoader().load(filter_by_name, load_results); } else { /// If `filter_by_name` isn't set, we iterate through only already loaded dictionaries. We don't try to load all dictionaries in this case. - loadables = context.getExternalDictionariesLoader().getCurrentlyLoadedObjects(); + load_results = context.getExternalDictionariesLoader().getCurrentLoadResults(); } - for (const auto & loadable : loadables) + for (const auto & [object_name, info]: load_results) { - auto dict_ptr = std::static_pointer_cast(loadable); - auto dict_name = dict_ptr->getName(); - const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); - auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); - tables[dict_name] = StorageDictionary::create(getDatabaseName(), dict_name, ColumnsDescription{columns}, context, true, dict_name); + /// Load tables only from XML dictionaries, don't touch other + if (info.object != nullptr && info.repository_name.empty()) + { + auto dict_ptr = std::static_pointer_cast(info.object); + auto dict_name = dict_ptr->getName(); + const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); + auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); + tables[dict_name] = StorageDictionary::create(getDatabaseName(), dict_name, ColumnsDescription{columns}, context, true, dict_name); + } } return tables; } diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index 318709903c6..ece77c8cd88 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -10,6 +10,7 @@ namespace ErrorCodes { extern const int CANNOT_GET_CREATE_TABLE_QUERY; extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY; + extern const int UNSUPPORTED_METHOD; } DatabaseMemory::DatabaseMemory(String name_) @@ -34,12 +35,17 @@ void DatabaseMemory::createTable( } +void DatabaseMemory::attachDictionary(const String & /*name*/, const Context & /*context*/, bool /*load*/) +{ + throw Exception("There is no ATTACH DICTIONARY query for DatabaseMemory", ErrorCodes::UNSUPPORTED_METHOD); +} + void DatabaseMemory::createDictionary( - const Context & context, - const String & dictionary_name, + const Context & /*context*/, + const String & /*dictionary_name*/, const ASTPtr & /*query*/) { - attachDictionary(dictionary_name, context, true); + throw Exception("There is no CREATE DICTIONARY query for DatabaseMemory", ErrorCodes::UNSUPPORTED_METHOD); } @@ -51,11 +57,17 @@ void DatabaseMemory::removeTable( } -void DatabaseMemory::removeDictionary( - const Context & context, - const String & dictionary_name) +void DatabaseMemory::detachDictionary(const String & /*name*/, const Context & /*context*/) { - detachDictionary(dictionary_name, context); + throw Exception("There is no DETACH DICTIONARY query for DatabaseMemory", ErrorCodes::UNSUPPORTED_METHOD); +} + + +void DatabaseMemory::removeDictionary( + const Context & /*context*/, + const String & /*dictionary_name*/) +{ + throw Exception("There is no DROP DICTIONARY query for DatabaseMemory", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index d0ef4a47d25..a04d01d8aae 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -38,6 +38,11 @@ public: const String & dictionary_name, const ASTPtr & query) override; + void attachDictionary( + const String & name, + const Context & context, + bool load) override; + void removeTable( const Context & context, const String & table_name) override; @@ -46,6 +51,10 @@ public: const Context & context, const String & dictionary_name) override; + void detachDictionary( + const String & name, + const Context & context) override; + time_t getObjectMetadataModificationTime(const Context & context, const String & table_name) override; ASTPtr getCreateTableQuery(const Context & context, const String & table_name) const override; diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 84adba87604..2dfaebaab40 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -30,12 +31,30 @@ namespace ErrorCodes extern const int DICTIONARY_ALREADY_EXISTS; } +namespace +{ + +StoragePtr getDictionaryStorage(const Context & context, const String & table_name, const String & db_name) +{ + auto dict_name = db_name + "." + table_name; + auto dict_ptr = context.getExternalDictionariesLoader().tryGetDictionary(dict_name); + if (dict_ptr) + { + const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); + auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); + return StorageDictionary::create(db_name, table_name, ColumnsDescription{columns}, context, true, dict_name); + } + return nullptr; +} + +} + bool DatabaseWithOwnTablesBase::isTableExist( const Context & /*context*/, const String & table_name) const { std::lock_guard lock(mutex); - return tables.find(table_name) != tables.end(); + return tables.find(table_name) != tables.end() || dictionaries.find(table_name) != dictionaries.end(); } bool DatabaseWithOwnTablesBase::isDictionaryExist( @@ -47,25 +66,55 @@ bool DatabaseWithOwnTablesBase::isDictionaryExist( } StoragePtr DatabaseWithOwnTablesBase::tryGetTable( - const Context & /*context*/, + const Context & context, const String & table_name) const { std::lock_guard lock(mutex); auto it = tables.find(table_name); if (it == tables.end()) + { + if (dictionaries.count(table_name)) + return getDictionaryStorage(context, table_name, getDatabaseName()); return {}; + } return it->second; } +DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) +{ + std::lock_guard lock(mutex); + Tables tables_copy = tables; + if (!filter_by_table_name) + { + for (const String & dictionary_name : dictionaries) + if (auto dictionary_storage = getDictionaryStorage(context, dictionary_name, getDatabaseName()); dictionary_storage) + tables_copy.emplace(dictionary_name, dictionary_storage); + return std::make_unique(tables_copy); + } + + Tables filtered_tables; + for (const auto & [table_name, storage] : tables) + if (filter_by_table_name(table_name)) + filtered_tables.emplace(table_name, storage); + for (const String & dictionary_name : dictionaries) + if (filter_by_table_name(dictionary_name)) + if (auto dictionary_storage = getDictionaryStorage(context, dictionary_name, getDatabaseName()); dictionary_storage) + tables_copy.emplace(dictionary_name, dictionary_storage); + + return std::make_unique(std::move(filtered_tables)); +} + DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(const Context & /*context*/, const FilterByNameFunction & filter_by_table_name) { std::lock_guard lock(mutex); if (!filter_by_table_name) return std::make_unique(tables); + Tables filtered_tables; for (const auto & [table_name, storage] : tables) if (filter_by_table_name(table_name)) filtered_tables.emplace(table_name, storage); + return std::make_unique(std::move(filtered_tables)); } @@ -94,6 +143,9 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name) StoragePtr res; { std::lock_guard lock(mutex); + if (dictionaries.count(table_name)) + throw Exception("Cannot detach dictionary " + name + "." + table_name + " as table, use DETACH DICTIONARY query.", ErrorCodes::UNKNOWN_TABLE); + auto it = tables.find(table_name); if (it == tables.end()) throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); diff --git a/dbms/src/Databases/DatabasesCommon.h b/dbms/src/Databases/DatabasesCommon.h index aec660cf945..00034dacbce 100644 --- a/dbms/src/Databases/DatabasesCommon.h +++ b/dbms/src/Databases/DatabasesCommon.h @@ -43,6 +43,8 @@ public: DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; + DatabaseTablesIteratorPtr getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) override; + void shutdown() override; virtual ~DatabaseWithOwnTablesBase() override; diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index d3147a36991..b4985dc066b 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -129,6 +129,12 @@ public: /// Get an iterator to pass through all the dictionaries. virtual DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name = {}) = 0; + /// Get an iterator to pass through all the tables and dictionary tables. + virtual DatabaseTablesIteratorPtr getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_name = {}) + { + return getTablesIterator(context, filter_by_name); + } + /// Is the database empty. virtual bool empty(const Context & context) const = 0; diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 596db6b8960..930904d2ad6 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -11,7 +11,7 @@ ExternalDictionariesLoader::ExternalDictionariesLoader( : ExternalLoader("external dictionary", &Logger::get("ExternalDictionariesLoader")) , context(context_) { - addConfigRepository("_XMLConfigRepository", std::move(config_repository), {"dictionary", "name"}); + addConfigRepository("", std::move(config_repository), {"dictionary", "name"}); enableAsyncLoading(true); enablePeriodicUpdates(true); } diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 70f22cb9afe..fbf2e2cd629 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -26,6 +26,7 @@ struct ExternalLoader::ObjectConfig String config_path; Poco::AutoPtr config; String key_in_config; + String repository_name; }; @@ -107,7 +108,7 @@ private: loadable_info.in_use = false; } - for (const auto & [name, repo_with_settings] : repositories) + for (const auto & [repo_name, repo_with_settings] : repositories) { const auto names = repo_with_settings.first->getAllLoadablesDefinitionNames(); for (const auto & loadable_name : names) @@ -116,13 +117,13 @@ private: if (it != loadables_infos.end()) { LoadablesInfos & loadable_info = it->second; - if (readLoadablesInfo(*repo_with_settings.first, loadable_name, repo_with_settings.second, loadable_info)) + if (readLoadablesInfo(repo_name, *repo_with_settings.first, loadable_name, repo_with_settings.second, loadable_info)) changed = true; } else { LoadablesInfos loadable_info; - if (readLoadablesInfo(*repo_with_settings.first, loadable_name, repo_with_settings.second, loadable_info)) + if (readLoadablesInfo(repo_name, *repo_with_settings.first, loadable_name, repo_with_settings.second, loadable_info)) { loadables_infos.emplace(loadable_name, std::move(loadable_info)); changed = true; @@ -145,6 +146,7 @@ private: } bool readLoadablesInfo( + const String & repo_name, IExternalLoaderConfigRepository & repository, const String & name, const ExternalLoaderConfigSettings & settings, @@ -191,7 +193,7 @@ private: continue; } - configs_from_file.emplace_back(external_name, ObjectConfig{name, file_contents, key}); + configs_from_file.emplace_back(external_name, ObjectConfig{name, file_contents, key, repo_name}); } loadable_info.configs = std::move(configs_from_file); @@ -441,8 +443,17 @@ public: loaded_objects = collectLoadedObjects(filter_by_name); } + /// Tries to finish loading of the objects for which the specified function returns true. + void load(const FilterByNameFunction & filter_by_name, LoadResults & loaded_results, Duration timeout = NO_TIMEOUT) + { + std::unique_lock lock{mutex}; + loadImpl(filter_by_name, timeout, lock); + loaded_results = collectLoadResults(filter_by_name); + } + /// Tries to finish loading of all the objects during the timeout. void load(Loadables & loaded_objects, Duration timeout = NO_TIMEOUT) { load(allNames, loaded_objects, timeout); } + void load(LoadResults & loaded_results, Duration timeout = NO_TIMEOUT) { load(allNames, loaded_results, timeout); } /// Starts reloading a specified object. void reload(const String & name, bool load_never_loading = false) @@ -581,6 +592,7 @@ private: result.loading_start_time = loading_start_time; result.loading_duration = loadingDuration(); result.origin = config.config_path; + result.repository_name = config.repository_name; return result; } @@ -627,8 +639,10 @@ private: LoadResults load_results; load_results.reserve(infos.size()); for (const auto & [name, info] : infos) + { if (filter_by_name(name)) load_results.emplace_back(name, info.loadResult()); + } return load_results; } @@ -1052,6 +1066,16 @@ void ExternalLoader::load(const FilterByNameFunction & filter_by_name, Loadables loading_dispatcher->load(loaded_objects, timeout); } + +void ExternalLoader::load(const FilterByNameFunction & filter_by_name, LoadResults & loaded_objects, Duration timeout) const +{ + if (filter_by_name) + loading_dispatcher->load(filter_by_name, loaded_objects, timeout); + else + loading_dispatcher->load(loaded_objects, timeout); +} + + void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const { return loading_dispatcher->load(loaded_objects, timeout); diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index cb86a614958..71b18d9edd6 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -72,6 +72,7 @@ public: TimePoint loading_start_time; Duration loading_duration; std::exception_ptr exception; + std::string repository_name; }; using LoadResults = std::vector>; @@ -137,6 +138,7 @@ public: /// Tries to finish loading of the objects for which the specified function returns true. void load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout = NO_TIMEOUT) const; + void load(const FilterByNameFunction & filter_by_name, LoadResults & load_results, Duration timeout = NO_TIMEOUT) const; Loadables loadAndGet(const FilterByNameFunction & filter_by_name, Duration timeout = NO_TIMEOUT) const { Loadables loaded_objects; load(filter_by_name, loaded_objects, timeout); return loaded_objects; } /// Tries to finish loading of all the objects during the timeout. diff --git a/dbms/src/Interpreters/IExternalLoaderConfigRepository.h b/dbms/src/Interpreters/IExternalLoaderConfigRepository.h index 93cefe0a0d4..efb91815555 100644 --- a/dbms/src/Interpreters/IExternalLoaderConfigRepository.h +++ b/dbms/src/Interpreters/IExternalLoaderConfigRepository.h @@ -13,6 +13,7 @@ namespace DB using LoadablesConfigurationPtr = Poco::AutoPtr; + /// Base interface for configurations source for Loadble objects, which can be /// loaded with ExternalLoader. Configurations may came from filesystem (XML-files), /// server memory (from database), etc. It's important that main result of this class diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index c42e7ec586d..31c7b1c45c3 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -292,7 +292,7 @@ BlockInputStreams StorageSystemColumns::read( const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; - for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesWithDictionaryTablesIterator(context); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 5674c9954bf..c31d514cf08 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -19,44 +19,6 @@ namespace DB { -namespace -{ - -NameSet getFilteredDatabases(const ASTPtr & query, const Context & context) -{ - MutableColumnPtr column = ColumnString::create(); - for (const auto & db : context.getDatabases()) - column->insert(db.first); - - Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; - VirtualColumnUtils::filterBlockWithQuery(query, block, context); - NameSet result; - for (size_t i = 0; i < block.rows(); ++i) - result.insert(block.getByPosition(0).column->getDataAt(i).toString()); - return result; -} - - -NameSet getFilteredDictionaries(const ASTPtr & query, const Context & context, const DatabasePtr & database) -{ - MutableColumnPtr column = ColumnString::create(); - auto dicts_it = database->getDictionariesIterator(context); - while (dicts_it->isValid()) - { - column->insert(dicts_it->name()); - dicts_it->next(); - } - - Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "dictionary")}; - VirtualColumnUtils::filterBlockWithQuery(query, block, context); - NameSet result; - for (size_t i = 0; i < block.rows(); ++i) - result.insert(block.getByPosition(0).column->getDataAt(i).toString()); - return result; -} - -} - NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() { return { @@ -81,15 +43,19 @@ NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() }; } -void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const +void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & /*query_info*/) const { const auto & external_dictionaries = context.getExternalDictionariesLoader(); for (const auto & [dict_name, load_result] : external_dictionaries.getCurrentLoadResults()) { size_t i = 0; - res_columns[i++]->insert(""); - res_columns[i++]->insert(dict_name); + res_columns[i++]->insert(load_result.repository_name); + if (!load_result.repository_name.empty()) + res_columns[i++]->insert(dict_name.substr(load_result.repository_name.length() + 1)); + else + res_columns[i++]->insert(dict_name); + res_columns[i++]->insert(static_cast(load_result.status)); res_columns[i++]->insert(load_result.origin); @@ -128,27 +94,6 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con else res_columns[i++]->insertDefault(); } - - /// Temporary code for testing TODO(alesapin) - NameSet databases = getFilteredDatabases(query_info.query, context); - for (auto database : databases) - { - DatabasePtr database_ptr = context.getDatabase(database); - auto dictionaries_set = getFilteredDictionaries(query_info.query, context, database_ptr); - auto filter = [&dictionaries_set](const String & dict_name) { return dictionaries_set.count(dict_name); }; - auto dictionaries_it = database_ptr->getDictionariesIterator(context, filter); - while (dictionaries_it->isValid()) - { - size_t i = 0; - res_columns[i++]->insert(database); - res_columns[i++]->insert(dictionaries_it->name()); - for (size_t j = 0; j < getNamesAndTypes().size() - 2; ++j) - res_columns[i++]->insertDefault(); - - dictionaries_it->next(); - } - - } } } diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index d73c72a927d..01f8704f681 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -193,7 +193,7 @@ protected: } if (!tables_it || !tables_it->isValid()) - tables_it = database->getTablesIterator(context); + tables_it = database->getTablesWithDictionaryTablesIterator(context); const bool need_lock_structure = needLockStructure(database, header); diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index ecf1730ca1c..a8ee6b67912 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -12,14 +12,6 @@ ordinary_db dict1 ==DROP DICTIONARY 0 =DICTIONARY in Memory DB -dict2 -1 -memory_db dict2 -==DETACH DICTIONARY -0 -==ATTACH DICTIONARY -0 -==DROP DICTIONARY 0 =DICTIONARY in Dictionary DB =DICTIONARY in Lazy DB diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index f5499f21bfe..ce8e5c2cc2c 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -89,7 +89,7 @@ CREATE DICTIONARY memory_db.dict2 PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) -LAYOUT(FLAT()); +LAYOUT(FLAT()); -- {serverError 1} SHOW CREATE DICTIONARY memory_db.dict2; -- {serverError 485} @@ -99,37 +99,6 @@ EXISTS DICTIONARY memory_db.dict2; SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; -SELECT '==DETACH DICTIONARY'; -DETACH DICTIONARY memory_db.dict2; - -SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; - -EXISTS DICTIONARY memory_db.dict2; - -SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; - -SELECT '==ATTACH DICTIONARY'; - -ATTACH DICTIONARY memory_db.dict2; --{serverError 485} - -SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; - -EXISTS DICTIONARY memory_db.dict2; - -SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; - -SELECT '==DROP DICTIONARY'; - -DROP DICTIONARY IF EXISTS memory_db.dict2; - -SHOW DICTIONARIES FROM memory_db LIKE 'dict2'; - -EXISTS DICTIONARY memory_db.dict2; - -SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; - -DROP DATABASE IF EXISTS memory_db; - DROP DATABASE IF EXISTS dictionary_db; CREATE DATABASE dictionary_db ENGINE = Dictionary; diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference index eac68a5ab59..5afa7f27963 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -1,5 +1,8 @@ 11 +11 144 +144 +7 7 17 11 @@ -7,3 +10,8 @@ 7 11 6 +dict1 Dictionary +dict2 Dictionary +table_for_dict MergeTree +database_for_dict dict1 ComplexKeyCache +database_for_dict dict2 Hashed diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 100cc26cb3d..b0b3ff1fdfb 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -31,8 +31,11 @@ LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); +SELECT second_column FROM database_for_dict.dict1 WHERE key_column = 11; SELECT dictGetString('database_for_dict.dict1', 'third_column', toUInt64(12)); +SELECT third_column FROM database_for_dict.dict1 WHERE key_column = 12; SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', toUInt64(14)); +SELECT fourth_column FROM database_for_dict.dict1 WHERE key_column = 14; select count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); @@ -84,4 +87,8 @@ LAYOUT(HASHED()); SELECT dictGetString('database_for_dict.dict2', 'some_column', toUInt64(12)); +SELECT name, engine FROM system.tables WHERE database = 'database_for_dict' ORDER BY name; + +SELECT database, name, type FROM system.dictionaries WHERE database = 'database_for_dict' ORDER BY name; + DROP DATABASE IF EXISTS database_for_dict; From 22429a699e74f5f3cccad9014f372fd88643e904 Mon Sep 17 00:00:00 2001 From: akonyaev Date: Thu, 17 Oct 2019 16:08:04 +0300 Subject: [PATCH 029/122] unexpected behaviour for avg over empty decimal column without group by --- dbms/src/AggregateFunctions/AggregateFunctionAvg.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index 185dbc38c51..840075511ea 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -27,12 +27,8 @@ struct AggregateFunctionAvgData template ResultT NO_SANITIZE_UNDEFINED result() const { - if constexpr (std::is_floating_point_v) - if constexpr (std::numeric_limits::is_iec559) - return static_cast(sum) / count; /// allow division by zero - - if (!count) - throw Exception("AggregateFunctionAvg with zero values", ErrorCodes::LOGICAL_ERROR); + if (count == 0) + return 0; return static_cast(sum) / count; } }; From 05392fd8826f4357b06e63ae2041cace9959584a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Oct 2019 20:18:54 +0300 Subject: [PATCH 030/122] Fix deadlock, add test and refactoring --- dbms/src/Databases/DatabaseOrdinary.cpp | 5 +- dbms/src/Databases/DatabasesCommon.cpp | 53 ++++---- dbms/src/Interpreters/Context.cpp | 11 +- .../ExternalDictionariesLoader.cpp | 7 +- .../Interpreters/ExternalDictionariesLoader.h | 5 + dbms/src/Interpreters/ExternalLoader.cpp | 22 ++-- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- ...ictionaries_concurrent_requrests.reference | 1 + ...8_ddl_dictionaries_concurrent_requrests.sh | 122 ++++++++++++++++++ .../01018_ddl_dictionaries_select.sql | 2 +- 10 files changed, 187 insertions(+), 43 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.reference create mode 100755 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 52fd477ef0e..519febbab8a 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -184,10 +184,11 @@ void DatabaseOrdinary::loadStoredObjects( /// After all tables was basically initialized, startup them. startupTables(pool); + /// Add database as repository auto dictionaries_repository = std::make_unique(shared_from_this(), context); - context.getExternalDictionariesLoader().addConfigRepository( - getDatabaseName(), std::move(dictionaries_repository), {"dictionary", "name"}); + auto & external_loader = context.getExternalDictionariesLoader(); + external_loader.addConfigRepository(getDatabaseName(), std::move(dictionaries_repository)); } diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 2dfaebaab40..19815428730 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -37,7 +37,8 @@ namespace StoragePtr getDictionaryStorage(const Context & context, const String & table_name, const String & db_name) { auto dict_name = db_name + "." + table_name; - auto dict_ptr = context.getExternalDictionariesLoader().tryGetDictionary(dict_name); + const auto & external_loader = context.getExternalDictionariesLoader(); + auto dict_ptr = external_loader.tryGetDictionary(dict_name); if (dict_ptr) { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); @@ -69,39 +70,41 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable( const Context & context, const String & table_name) const { - std::lock_guard lock(mutex); - auto it = tables.find(table_name); - if (it == tables.end()) { - if (dictionaries.count(table_name)) - return getDictionaryStorage(context, table_name, getDatabaseName()); - return {}; + std::lock_guard lock(mutex); + auto it = tables.find(table_name); + if (it != tables.end()) + return it->second; } - return it->second; + + if (isDictionaryExist(context, table_name)) + /// We don't need lock database here, because database doesn't store dictionary itself + /// just metadata + return getDictionaryStorage(context, table_name, getDatabaseName()); + + return {}; } -DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_name) { - std::lock_guard lock(mutex); - Tables tables_copy = tables; - if (!filter_by_table_name) + auto tables_it = getTablesIterator(context, filter_by_name); + auto dictionaries_it = getDictionariesIterator(context, filter_by_name); + + Tables result; + while (tables_it && tables_it->isValid()) { - for (const String & dictionary_name : dictionaries) - if (auto dictionary_storage = getDictionaryStorage(context, dictionary_name, getDatabaseName()); dictionary_storage) - tables_copy.emplace(dictionary_name, dictionary_storage); - return std::make_unique(tables_copy); + result.emplace(tables_it->name(), tables_it->table()); + tables_it->next(); } - Tables filtered_tables; - for (const auto & [table_name, storage] : tables) - if (filter_by_table_name(table_name)) - filtered_tables.emplace(table_name, storage); - for (const String & dictionary_name : dictionaries) - if (filter_by_table_name(dictionary_name)) - if (auto dictionary_storage = getDictionaryStorage(context, dictionary_name, getDatabaseName()); dictionary_storage) - tables_copy.emplace(dictionary_name, dictionary_storage); + while (dictionaries_it && dictionaries_it->isValid()) + { + auto table_name = dictionaries_it->name(); + result.emplace(table_name, getDictionaryStorage(context, table_name, getDatabaseName())); + dictionaries_it->next(); + } - return std::make_unique(std::move(filtered_tables)); + return std::make_unique(result); } DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(const Context & /*context*/, const FilterByNameFunction & filter_by_table_name) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 23588275c3e..2b3cb267557 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1041,10 +1041,10 @@ void Context::addDatabase(const String & database_name, const DatabasePtr & data DatabasePtr Context::detachDatabase(const String & database_name) { auto lock = getLock(); - auto res = getDatabase(database_name); getExternalDictionariesLoader().removeConfigRepository(database_name); shared->databases.erase(database_name); + return res; } @@ -1342,13 +1342,20 @@ ExternalDictionariesLoader & Context::getExternalDictionariesLoader() const ExternalModelsLoader & Context::getExternalModelsLoader() const { + { + std::lock_guard lock(shared->external_models_mutex); + if (shared->external_models_loader) + return *shared->external_models_loader; + } + + const auto & config = getConfigRef(); std::lock_guard lock(shared->external_models_mutex); if (!shared->external_models_loader) { if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = std::make_unique(getConfigRef(), "models_config"); + auto config_repository = std::make_unique(config, "models_config"); shared->external_models_loader.emplace(std::move(config_repository), *this->global_context); } return *shared->external_models_loader; diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 930904d2ad6..24622e43b18 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -11,7 +11,7 @@ ExternalDictionariesLoader::ExternalDictionariesLoader( : ExternalLoader("external dictionary", &Logger::get("ExternalDictionariesLoader")) , context(context_) { - addConfigRepository("", std::move(config_repository), {"dictionary", "name"}); + addConfigRepository("", std::move(config_repository)); enableAsyncLoading(true); enablePeriodicUpdates(true); } @@ -23,4 +23,9 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( return DictionaryFactory::instance().create(name, config, key_in_config, context); } +void ExternalDictionariesLoader::addConfigRepository( + const std::string & repository_name, std::unique_ptr config_repository) +{ + ExternalLoader::addConfigRepository(repository_name, std::move(config_repository), {"dictionary", "name"}); +} } diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.h b/dbms/src/Interpreters/ExternalDictionariesLoader.h index 04916f90d9d..413eac72931 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.h +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.h @@ -33,6 +33,11 @@ public: return std::static_pointer_cast(tryGetLoadable(name)); } + void addConfigRepository( + const std::string & repository_name, + std::unique_ptr config_repository); + + protected: LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config) const override; diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index fbf2e2cd629..46e8535a694 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -67,7 +67,7 @@ public: // Generate new result. auto new_configs = std::make_shared>(); - for (const auto & [path, loadable_info] : loadables_infos) + for (const auto & [path, loadable_info] : loadables_infos) { for (const auto & [name, config] : loadable_info.configs) { @@ -148,19 +148,19 @@ private: bool readLoadablesInfo( const String & repo_name, IExternalLoaderConfigRepository & repository, - const String & name, + const String & object_name, const ExternalLoaderConfigSettings & settings, LoadablesInfos & loadable_info) const { try { - if (name.empty() || !repository.exists(name)) + if (object_name.empty() || !repository.exists(object_name)) { - LOG_WARNING(log, "Config file '" + name + "' does not exist"); + LOG_WARNING(log, "Config file '" + object_name + "' does not exist"); return false; } - auto update_time_from_repository = repository.getUpdateTime(name); + auto update_time_from_repository = repository.getUpdateTime(object_name); /// Actually it can't be less, but for sure we check less or equal if (update_time_from_repository <= loadable_info.last_update_time) @@ -169,31 +169,31 @@ private: return false; } - auto file_contents = repository.load(name); + auto file_contents = repository.load(object_name); /// get all objects' definitions Poco::Util::AbstractConfiguration::Keys keys; file_contents->keys(keys); - /// for each object defined in xml config + /// for each object defined in repositories std::vector> configs_from_file; for (const auto & key : keys) { if (!startsWith(key, settings.external_config)) { if (!startsWith(key, "comment") && !startsWith(key, "include_from")) - LOG_WARNING(log, name << ": file contains unknown node '" << key << "', expected '" << settings.external_config << "'"); + LOG_WARNING(log, object_name << ": file contains unknown node '" << key << "', expected '" << settings.external_config << "'"); continue; } String external_name = file_contents->getString(key + "." + settings.external_name); if (external_name.empty()) { - LOG_WARNING(log, name << ": node '" << key << "' defines " << type_name << " with an empty name. It's not allowed"); + LOG_WARNING(log, object_name << ": node '" << key << "' defines " << type_name << " with an empty name. It's not allowed"); continue; } - configs_from_file.emplace_back(external_name, ObjectConfig{name, file_contents, key, repo_name}); + configs_from_file.emplace_back(external_name, ObjectConfig{object_name, file_contents, key, repo_name}); } loadable_info.configs = std::move(configs_from_file); @@ -203,7 +203,7 @@ private: } catch (...) { - tryLogCurrentException(log, "Failed to load config for dictionary '" + name + "'"); + tryLogCurrentException(log, "Failed to load config for dictionary '" + object_name + "'"); return false; } } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index fc884beff07..d21328f8e31 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -778,7 +778,7 @@ void InterpreterCreateQuery::checkAccess(const ASTCreateQuery & create) return; if (readonly) - throw Exception("Cannot create table in readonly mode", ErrorCodes::READONLY); + throw Exception("Cannot create table or dictionary in readonly mode", ErrorCodes::READONLY); throw Exception("Cannot create table. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); } diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.reference new file mode 100644 index 00000000000..7193c3d3f3d --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.reference @@ -0,0 +1 @@ +Still alive diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh new file mode 100755 index 00000000000..cc7d52ce1ab --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT -n -q " + DROP DATABASE IF EXISTS database_for_dict; + DROP TABLE IF EXISTS table_for_dict1; + DROP TABLE IF EXISTS table_for_dict2; + + CREATE TABLE table_for_dict1 (key_column UInt64, value_column String) ENGINE = MergeTree ORDER BY key_column; + CREATE TABLE table_for_dict2 (key_column UInt64, value_column String) ENGINE = MergeTree ORDER BY key_column; + + INSERT INTO table_for_dict1 SELECT number, toString(number) from numbers(1000); + INSERT INTO table_for_dict2 SELECT number, toString(number) from numbers(1000, 1000); + + CREATE DATABASE database_for_dict; + + CREATE DICTIONARY database_for_dict.dict1 (key_column UInt64, value_column String) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) LIFETIME(MIN 1 MAX 5) LAYOUT(FLAT()); + + CREATE DICTIONARY database_for_dict.dict2 (key_column UInt64, value_column String) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict2' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) LIFETIME(MIN 1 MAX 5) LAYOUT(CACHE(SIZE_IN_CELLS 150)); +" + + +function thread1() +{ + while true; do $CLICKHOUSE_CLIENT --query "SELECT * FROM system.dictionaries FORMAT Null"; done +} + +function thread2() +{ + while true; do CLICKHOUSE_CLIENT --query "ATTACH DICTIONARY database_for_dict.dict1" ||: ; done +} + +function thread3() +{ + while true; do CLICKHOUSE_CLIENT --query "ATTACH DICTIONARY database_for_dict.dict2" ||:; done +} + + +function thread4() +{ + while true; do $CLICKHOUSE_CLIENT -n -q " + SELECT * FROM database_for_dict.dict1 FORMAT Null; + SELECT * FROM database_for_dict.dict2 FORMAT Null; + " ||: ; done +} + +function thread5() +{ + while true; do $CLICKHOUSE_CLIENT -n -q " + SELECT dictGetString('database_for_dict.dict1', 'value_column', toUInt64(number)) from numbers(1000) FROM FORMAT Null; + SELECT dictGetString('database_for_dict.dict2', 'value_column', toUInt64(number)) from numbers(1000) FROM FORMAT Null; + " ||: ; done +} + +function thread6() +{ + while true; do $CLICKHOUSE_CLIENT -q "DETACH DICTIONARY database_for_dict.dict1"; done +} + +function thread7() +{ + while true; do $CLICKHOUSE_CLIENT -q "DETACH DICTIONARY database_for_dict.dict2"; done +} + + +export -f thread1; +export -f thread2; +export -f thread3; +export -f thread4; +export -f thread5; +export -f thread6; +export -f thread7; + +TIMEOUT=10 + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2> /dev/null & +timeout $TIMEOUT bash -c thread7 2> /dev/null & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2> /dev/null & +timeout $TIMEOUT bash -c thread7 2> /dev/null & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2> /dev/null & +timeout $TIMEOUT bash -c thread7 2> /dev/null & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2> /dev/null & +timeout $TIMEOUT bash -c thread7 2> /dev/null & + +wait +$CLICKHOUSE_CLIENT -q "SELECT 'Still alive'" + +$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict1" +$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict2" + +$CLICKHOUSE_CLIENT -n -q " + DROP TABLE table_for_dict1; + DROP TABLE table_for_dict2; + DROP DATABASE database_for_dict; +" diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index b0b3ff1fdfb..1b9741bd720 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -37,7 +37,7 @@ SELECT third_column FROM database_for_dict.dict1 WHERE key_column = 12; SELECT dictGetFloat64('database_for_dict.dict1', 'fourth_column', toUInt64(14)); SELECT fourth_column FROM database_for_dict.dict1 WHERE key_column = 14; -select count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); +SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); DETACH DICTIONARY database_for_dict.dict1; From c29b39002db91ff66288b29d864aea073f4fb41b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Oct 2019 20:36:53 +0300 Subject: [PATCH 031/122] Fix race condition, add test --- dbms/src/Interpreters/ExternalLoader.cpp | 8 +- ...8_dictionaries_from_dictionaries.reference | 8 ++ .../01018_dictionaries_from_dictionaries.sql | 98 +++++++++++++++++++ 3 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference create mode 100644 dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 46e8535a694..9acd42d15f4 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -770,6 +770,10 @@ private: if (!new_object && !new_exception) throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); + /// Lock the mutex again to store the changes. + if (async) + lock.lock(); + /// Calculate a new update time. TimePoint next_update_time; try @@ -778,6 +782,7 @@ private: ++error_count; else error_count = 0; + next_update_time = calculateNextUpdateTime(new_object, error_count); } catch (...) @@ -786,9 +791,6 @@ private: next_update_time = TimePoint::max(); } - /// Lock the mutex again to store the changes. - if (async) - lock.lock(); info = getInfo(name); /// And again we should check if this is still the same loading as we were doing. diff --git a/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference new file mode 100644 index 00000000000..7d275171930 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference @@ -0,0 +1,8 @@ +100 +100 +200 +200 +200 +300 +300 +300 diff --git a/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql new file mode 100644 index 00000000000..43bff036a32 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql @@ -0,0 +1,98 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +CREATE TABLE database_for_dict.table_for_dict +( + key_column UInt64, + second_column UInt8, + third_column String, + fourth_column Float64 +) +ENGINE = MergeTree() +ORDER BY key_column; + +INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100); + +DROP DICTIONARY IF EXISTS database_for_dict.dict1; + +CREATE DICTIONARY database_for_dict.dict1 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SELECT count(*) from database_for_dict.dict1; + +CREATE DICTIONARY database_for_dict.dict2 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict1' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED()); + +SELECT count(*) FROM database_for_dict.dict2; + +INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(100, 100); + +SYSTEM RELOAD DICTIONARIES; + +SELECT count(*) from database_for_dict.dict2; +SELECT count(*) from database_for_dict.dict1; + +CREATE DICTIONARY database_for_dict.dict3 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict2' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED()); + + +SELECT count(*) FROM database_for_dict.dict3; + +INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(200, 100); + +SYSTEM RELOAD DICTIONARIES; + +SELECT count(*) from database_for_dict.dict3; +SELECT count(*) from database_for_dict.dict2; +SELECT count(*) from database_for_dict.dict1; + + +CREATE DICTIONARY database_for_dict.dict4 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'non_existing_table' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED()); + +SELECT count(*) FROM database_for_dict.dict4; -- {serverError 60} + +DROP DATABASE IF EXISTS database_for_dict; + +SELECT count(*) from database_for_dict.dict3; --{serverError 81} +SELECT count(*) from database_for_dict.dict2; --{serverError 81} +SELECT count(*) from database_for_dict.dict1; --{serverError 81} From b222ec1209264dae89765f41d2d2613edd45570a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Oct 2019 20:53:08 +0300 Subject: [PATCH 032/122] Fix segmentation fault --- dbms/src/Databases/DatabasesCommon.cpp | 4 +++- .../01018_dictionaries_from_dictionaries.reference | 8 ++++++++ .../0_stateless/01018_dictionaries_from_dictionaries.sql | 4 +++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 19815428730..e1ee1045657 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -100,7 +100,9 @@ DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesWithDictionaryTabl while (dictionaries_it && dictionaries_it->isValid()) { auto table_name = dictionaries_it->name(); - result.emplace(table_name, getDictionaryStorage(context, table_name, getDatabaseName())); + auto table_ptr = getDictionaryStorage(context, table_name, getDatabaseName()); + if (table_ptr) + result.emplace(table_name, table_ptr); dictionaries_it->next(); } diff --git a/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference index 7d275171930..87dc6a5b6bf 100644 --- a/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference +++ b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.reference @@ -6,3 +6,11 @@ 300 300 300 +dict1 +dict2 +dict3 +table_for_dict +dict1 +dict2 +dict3 +dict4 diff --git a/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql index 43bff036a32..029dc9755d8 100644 --- a/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql +++ b/dbms/tests/queries/0_stateless/01018_dictionaries_from_dictionaries.sql @@ -65,7 +65,6 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict2' DB 'da LIFETIME(MIN 1 MAX 10) LAYOUT(HASHED()); - SELECT count(*) FROM database_for_dict.dict3; INSERT INTO database_for_dict.table_for_dict SELECT number, number % 17, toString(number * number), number / 2.0 from numbers(200, 100); @@ -91,6 +90,9 @@ LAYOUT(HASHED()); SELECT count(*) FROM database_for_dict.dict4; -- {serverError 60} +SELECT name from system.tables WHERE database = 'database_for_dict' ORDER BY name; +SELECT name from system.dictionaries WHERE database = 'database_for_dict' ORDER BY name; + DROP DATABASE IF EXISTS database_for_dict; SELECT count(*) from database_for_dict.dict3; --{serverError 81} From 0f6176cc2d5fd9b3f4475077095eeb12794fd547 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 18 Oct 2019 00:08:28 +0300 Subject: [PATCH 033/122] throw on ambiguous qualified column --- dbms/src/Interpreters/IdentifierSemantic.cpp | 77 ++++++++++++++----- dbms/src/Interpreters/IdentifierSemantic.h | 23 +++++- .../JoinToSubqueryTransformVisitor.cpp | 2 +- .../TranslateQualifiedNamesVisitor.cpp | 3 +- dbms/src/Parsers/ASTIdentifier.cpp | 12 ++- .../0_stateless/00826_cross_to_inner_join.sql | 4 +- 6 files changed, 95 insertions(+), 26 deletions(-) diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index f5006159aa9..39ad58c3bc7 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int AMBIGUOUS_COLUMN_NAME; +} + namespace { @@ -19,18 +24,37 @@ const DatabaseAndTableWithAlias & extractTable(const TableWithColumnNames & tabl } template -bool tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, size_t & best_table_pos) +IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, + size_t & best_table_pos, bool allow_ambiguous) { + using ColumnMatch = IdentifierSemantic::ColumnMatch; + best_table_pos = 0; - size_t best_match = 0; + auto best_match = ColumnMatch::NoMatch; + size_t same_match = 0; + for (size_t i = 0; i < tables.size(); ++i) - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i]))) - if (match > best_match) + { + auto match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i])); + if (value(match)) + { + if (value(match) > value(best_match)) { best_match = match; best_table_pos = i; + same_match = 0; } + else if (match == best_match) + ++same_match; + } + } + if (value(best_match) && same_match) + { + if (!allow_ambiguous) + throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + return ColumnMatch::Ambiguous; + } return best_match; } @@ -89,15 +113,15 @@ size_t IdentifierSemantic::getMembership(const ASTIdentifier & identifier) } bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, - size_t & best_table_pos) + size_t & best_table_pos, bool ambiguous) { - return tryChooseTable(identifier, tables, best_table_pos); + return value(tryChooseTable(identifier, tables, best_table_pos, ambiguous)); } bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, - size_t & best_table_pos) + size_t & best_table_pos, bool ambiguous) { - return tryChooseTable(identifier, tables, best_table_pos); + return value(tryChooseTable(identifier, tables, best_table_pos, ambiguous)); } std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) @@ -127,18 +151,22 @@ bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier return false; } -size_t IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) +IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, + const DatabaseAndTableWithAlias & db_and_table) { /// database.table.column if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table)) - return 2; + return ColumnMatch::DatabaseAndTable; - /// table.column or alias.column. - if (doesIdentifierBelongTo(identifier, db_and_table.table) || - doesIdentifierBelongTo(identifier, db_and_table.alias)) - return 1; + /// alias.column + if (doesIdentifierBelongTo(identifier, db_and_table.alias)) + return ColumnMatch::TableAlias; - return 0; + /// table.column + if (doesIdentifierBelongTo(identifier, db_and_table.table)) + return ColumnMatch::TableName; + + return ColumnMatch::NoMatch; } /// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. @@ -162,10 +190,23 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) { - size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); + auto match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); + size_t to_strip = 0; + switch (match) + { + case ColumnMatch::TableName: + case ColumnMatch::TableAlias: + to_strip = 1; + break; + case ColumnMatch::DatabaseAndTable: + to_strip = 2; + break; + default: + break; + } - setColumnShortName(identifier, match); - if (match) + setColumnShortName(identifier, to_strip); + if (value(match)) identifier.semantic->can_be_alias = false; if (identifier.semantic->need_long_name) diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 7403f5d2340..8a48227d6fe 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -17,6 +17,16 @@ struct IdentifierSemanticImpl /// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier struct IdentifierSemantic { + enum class ColumnMatch + { + NoMatch, + ColumnName, /// table has column with same name + TableName, /// column qualified with table name + DatabaseAndTable, /// column qualified with database and table name + TableAlias, /// column qualified with table alias + Ambiguous, + }; + /// @returns name for column identifiers static std::optional getColumnName(const ASTIdentifier & node); static std::optional getColumnName(const ASTPtr & ast); @@ -26,7 +36,7 @@ struct IdentifierSemantic static std::optional getTableName(const ASTPtr & ast); static std::pair extractDatabaseAndTable(const ASTIdentifier & identifier); - static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static String columnLongName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); @@ -35,8 +45,10 @@ struct IdentifierSemantic static bool canBeAlias(const ASTIdentifier & identifier); static void setMembership(ASTIdentifier & identifier, size_t table_no); static size_t getMembership(const ASTIdentifier & identifier); - static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos); - static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos); + static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, + bool ambiguous = false); + static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, + bool ambiguous = false); private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); @@ -44,4 +56,9 @@ private: static void setColumnShortName(ASTIdentifier & identifier, size_t match); }; +inline UInt32 value(IdentifierSemantic::ColumnMatch match) +{ + return static_cast(match); +} + } diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index f807bfb7acb..b7bf7c9b983 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -226,7 +226,7 @@ struct ColumnAliasesMatcher String long_name; for (auto & table : data.tables) { - if (IdentifierSemantic::canReferColumnToTable(node, table)) + if (value(IdentifierSemantic::canReferColumnToTable(node, table))) { if (!long_name.empty()) throw Exception("Cannot refer column '" + node.name + "' to one table", ErrorCodes::AMBIGUOUS_COLUMN_NAME); diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 7226ce9d4dd..204fe454052 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -62,7 +62,8 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (IdentifierSemantic::getColumnName(identifier)) { size_t best_table_pos = 0; - if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos)) + bool allow_ambiguous = data.join_using_columns.count(identifier.shortName()); + if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos, allow_ambiguous)) IdentifierSemantic::setMembership(identifier, best_table_pos + 1); /// In case if column from the joined table are in source columns, change it's name to qualified. diff --git a/dbms/src/Parsers/ASTIdentifier.cpp b/dbms/src/Parsers/ASTIdentifier.cpp index e3948f99f5b..6307db675fa 100644 --- a/dbms/src/Parsers/ASTIdentifier.cpp +++ b/dbms/src/Parsers/ASTIdentifier.cpp @@ -34,10 +34,20 @@ ASTIdentifier::ASTIdentifier(const String & name_, std::vector && name_p , name_parts(name_parts_) , semantic(std::make_shared()) { + if (name_parts.size() && name_parts[0] == "") + name_parts.erase(name_parts.begin()); + + if (name == "") + { + if (name_parts.size() == 2) + name = name_parts[0] + '.' + name_parts[1]; + else if (name_parts.size() == 1) + name = name_parts[0]; + } } ASTIdentifier::ASTIdentifier(std::vector && name_parts_) - : ASTIdentifier(name_parts_.at(0) + '.' + name_parts_.at(1), std::move(name_parts_)) + : ASTIdentifier("", std::move(name_parts_)) {} void ASTIdentifier::setShortName(const String & new_name) diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql index fa16cf398da..035662a0a0d 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -1,9 +1,9 @@ SET enable_debug_queries = 1; set allow_experimental_cross_to_join_conversion = 0; -select * from system.one cross join system.one; +select * from system.one l cross join system.one r; set allow_experimental_cross_to_join_conversion = 1; -select * from system.one cross join system.one; +select * from system.one l cross join system.one r; DROP TABLE IF EXISTS t1_00826; DROP TABLE IF EXISTS t2_00826; From cdc195727ec9226e710e3c408113ee1d8e3b72b4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 18 Oct 2019 18:44:32 +0300 Subject: [PATCH 034/122] Fix some races from lazy load and add ugly interface to loader (need to be refactored) --- dbms/programs/server/Server.cpp | 4 + dbms/src/Databases/DatabaseOnDisk.cpp | 8 +- dbms/src/Interpreters/Context.cpp | 4 +- .../ExternalDictionariesLoader.cpp | 20 ++- .../Interpreters/ExternalDictionariesLoader.h | 14 +- dbms/src/Interpreters/ExternalLoader.cpp | 107 +++++++++--- dbms/src/Interpreters/ExternalLoader.h | 14 +- .../Interpreters/InterpreterCreateQuery.cpp | 11 +- .../test_dictionaries_ddl/__init__.py | 0 .../test_dictionaries_ddl/configs/config.xml | 19 ++ .../configs/dictionaries/lazy_load.xml | 4 + .../dictionaries/simple_dictionary.xml | 41 +++++ .../test_dictionaries_ddl/configs/users.xml | 36 ++++ .../integration/test_dictionaries_ddl/test.py | 163 ++++++++++++++++++ 14 files changed, 409 insertions(+), 36 deletions(-) create mode 100644 dbms/tests/integration/test_dictionaries_ddl/__init__.py create mode 100644 dbms/tests/integration/test_dictionaries_ddl/configs/config.xml create mode 100644 dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/lazy_load.xml create mode 100644 dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/simple_dictionary.xml create mode 100644 dbms/tests/integration/test_dictionaries_ddl/configs/users.xml create mode 100644 dbms/tests/integration/test_dictionaries_ddl/test.py diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 5385057fd03..a9a088f80dd 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -920,6 +921,9 @@ int Server::main(const std::vector & /*args*/) global_context->tryCreateEmbeddedDictionaries(); global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true); } + + auto config_repository = std::make_unique(config(), "dictionaries_config"); + global_context->getExternalDictionariesLoader().addConfigRepository("", std::move(config_repository)); } catch (...) { diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index d04321f8357..bb87b18f810 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -298,14 +299,15 @@ void DatabaseOnDisk::createDictionary( { /// Do not load it now database.attachDictionary(dictionary_name, context, false); + /// Load dictionary + bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); + String dict_name = database.getDatabaseName() + "." + dictionary_name; + context.getExternalDictionariesLoader().reloadSingleDictionary(dict_name, database.getDatabaseName(), query->as(), !lazy_load, !lazy_load); /// If it was ATTACH query and file with table metadata already exist /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one. Poco::File(dictionary_metadata_tmp_path).renameTo(dictionary_metadata_path); - /// Load dictionary - bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); - context.getExternalDictionariesLoader().reload(database.getDatabaseName() + "." + dictionary_name, !lazy_load); } catch (...) { diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 2b3cb267557..57971e446c9 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1321,15 +1321,13 @@ const ExternalDictionariesLoader & Context::getExternalDictionariesLoader() cons return *shared->external_dictionaries_loader; } - const auto & config = getConfigRef(); std::lock_guard lock(shared->external_dictionaries_mutex); if (!shared->external_dictionaries_loader) { if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = std::make_unique(config, "dictionaries_config"); - shared->external_dictionaries_loader.emplace(std::move(config_repository), *this->global_context); + shared->external_dictionaries_loader.emplace(*this->global_context); } return *shared->external_dictionaries_loader; } diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 24622e43b18..422b41eb336 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -1,17 +1,16 @@ #include #include #include +#include namespace DB { /// Must not acquire Context lock in constructor to avoid possibility of deadlocks. -ExternalDictionariesLoader::ExternalDictionariesLoader( - ExternalLoaderConfigRepositoryPtr config_repository, Context & context_) +ExternalDictionariesLoader::ExternalDictionariesLoader(Context & context_) : ExternalLoader("external dictionary", &Logger::get("ExternalDictionariesLoader")) , context(context_) { - addConfigRepository("", std::move(config_repository)); enableAsyncLoading(true); enablePeriodicUpdates(true); } @@ -28,4 +27,19 @@ void ExternalDictionariesLoader::addConfigRepository( { ExternalLoader::addConfigRepository(repository_name, std::move(config_repository), {"dictionary", "name"}); } + + +void ExternalDictionariesLoader::reloadSingleDictionary( + const String & name, + const String & repo_name, + const ASTCreateQuery & query, + bool load_never_loading, bool sync) const +{ + return ExternalLoader::reloadWithConfig( + name, /// names are equal + name, + repo_name, + getDictionaryConfigurationFromAST(query), + "dictionary", load_never_loading, sync); +} } diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.h b/dbms/src/Interpreters/ExternalDictionariesLoader.h index 413eac72931..f596f602dea 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.h +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -19,9 +20,7 @@ public: using DictPtr = std::shared_ptr; /// Dictionaries will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds. - ExternalDictionariesLoader( - ExternalLoaderConfigRepositoryPtr config_repository, - Context & context_); + ExternalDictionariesLoader(Context & context_); DictPtr getDictionary(const std::string & name) const { @@ -38,6 +37,15 @@ public: std::unique_ptr config_repository); + /// Starts reloading of a specified object. + void reloadSingleDictionary( + const String & name, + const String & repo_name, + const ASTCreateQuery & query, + bool load_never_loading = false, + bool sync = false) const; + + protected: LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config) const override; diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 9acd42d15f4..f06d891a928 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -41,7 +41,10 @@ public: } ~LoadablesConfigReader() = default; - void addConfigRepository(const String & name, std::unique_ptr repository, const ExternalLoaderConfigSettings & settings) + void addConfigRepository( + const String & name, + std::unique_ptr repository, + const ExternalLoaderConfigSettings & settings) { std::lock_guard lock{mutex}; repositories.emplace(name, std::make_pair(std::move(repository), settings)); @@ -53,18 +56,66 @@ public: repositories.erase(name); } - using ObjectConfigsPtr = std::shared_ptr>; + /// Reads configurations. ObjectConfigsPtr read() { - std::lock_guard lock{mutex}; - + std::lock_guard lock(mutex); // Check last modification times of files and read those files which are new or changed. if (!readLoadablesInfos()) return configs; // Nothing changed, so we can return the previous result. + return collectConfigs(); + } + + ObjectConfigsPtr updateLoadableInfo( + const String & external_name, + const String & object_name, + const String & repo_name, + const Poco::AutoPtr & config, + const String & key) + { + std::lock_guard lock(mutex); + + auto it = loadables_infos.find(object_name); + if (it == loadables_infos.end()) + { + LoadablesInfos loadable_info; + loadables_infos[object_name] = loadable_info; + } + auto & loadable_info = loadables_infos[object_name]; + ObjectConfig object_config{object_name, config, key, repo_name}; + bool found = false; + for (auto iter = loadable_info.configs.begin(); iter != loadable_info.configs.end(); ++iter) + { + if (iter->first == external_name) + { + iter->second = object_config; + found = true; + break; + } + } + + if (!found) + loadable_info.configs.emplace_back(external_name, object_config); + loadable_info.last_update_time = Poco::Timestamp{}; /// now + loadable_info.in_use = true; + return collectConfigs(); + } + +private: + struct LoadablesInfos + { + Poco::Timestamp last_update_time = 0; + std::vector> configs; // Parsed loadable's contents. + bool in_use = true; // Whether the `LoadablesInfos` should be destroyed because the correspondent loadable is deleted. + }; + + /// Collect current configurations + ObjectConfigsPtr collectConfigs() + { // Generate new result. auto new_configs = std::make_shared>(); for (const auto & [path, loadable_info] : loadables_infos) @@ -89,14 +140,6 @@ public: return configs; } -private: - struct LoadablesInfos - { - Poco::Timestamp last_update_time = 0; - std::vector> configs; // Parsed file's contents. - bool in_use = true; // Whether the ` LoadablesInfos` should be destroyed because the correspondent file is deleted. - }; - /// Read files and store them to the map ` loadables_infos`. bool readLoadablesInfos() { @@ -208,6 +251,7 @@ private: } } + const String type_name; Logger * log; @@ -337,7 +381,6 @@ public: /// Sets whether the objects should be loaded asynchronously, each loading in a new thread (from the thread pool). void enableAsyncLoading(bool enable) { - std::lock_guard lock{mutex}; enable_async_loading = enable; } @@ -456,18 +499,20 @@ public: void load(LoadResults & loaded_results, Duration timeout = NO_TIMEOUT) { load(allNames, loaded_results, timeout); } /// Starts reloading a specified object. - void reload(const String & name, bool load_never_loading = false) + void reload(const String & name, bool load_never_loading = false, bool sync = false) { std::lock_guard lock{mutex}; Info * info = getInfo(name); if (!info) + { return; + } if (info->wasLoading() || load_never_loading) { cancelLoading(*info); info->forced_to_reload = true; - startLoading(name, *info); + startLoading(name, *info, sync); } } @@ -690,7 +735,7 @@ private: event.wait_for(lock, timeout, pred); } - void startLoading(const String & name, Info & info) + void startLoading(const String & name, Info & info, bool sync = false) { if (info.loading()) return; @@ -701,7 +746,7 @@ private: info.loading_start_time = std::chrono::system_clock::now(); info.loading_end_time = TimePoint{}; - if (enable_async_loading) + if (enable_async_loading && !sync) { /// Put a job to the thread pool for the loading. auto thread = ThreadFromGlobalPool{&LoadingDispatcher::doLoading, this, name, loading_id, true}; @@ -710,6 +755,7 @@ private: else { /// Perform the loading immediately. + /// Deadlock when we try to load dictionary from dictionary on localhost doLoading(name, loading_id, false); } } @@ -773,6 +819,8 @@ private: /// Lock the mutex again to store the changes. if (async) lock.lock(); + else if (new_exception) + std::rethrow_exception(new_exception); /// Calculate a new update time. TimePoint next_update_time; @@ -895,7 +943,7 @@ private: ObjectConfigsPtr configs; std::unordered_map infos; bool always_load_everything = false; - bool enable_async_loading = false; + std::atomic enable_async_loading = false; std::unordered_map loading_ids; size_t next_loading_id = 1; /// should always be > 0 mutable pcg64 rnd_engine{randomSeed()}; @@ -992,7 +1040,6 @@ void ExternalLoader::addConfigRepository( void ExternalLoader::removeConfigRepository(const std::string & repository_name) { config_files_reader->removeConfigRepository(repository_name); - loading_dispatcher->setConfiguration(config_files_reader->read()); } void ExternalLoader::enableAlwaysLoadEverything(bool enable) @@ -1083,10 +1130,11 @@ void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const return loading_dispatcher->load(loaded_objects, timeout); } -void ExternalLoader::reload(const String & name, bool load_never_loading) const +void ExternalLoader::reload(const String & name, bool load_never_loading, bool sync) const { - loading_dispatcher->setConfiguration(config_files_reader->read()); - loading_dispatcher->reload(name, load_never_loading); + auto configs = config_files_reader->read(); + loading_dispatcher->setConfiguration(configs); + loading_dispatcher->reload(name, load_never_loading, sync); } void ExternalLoader::reload(bool load_never_loading) const @@ -1095,6 +1143,21 @@ void ExternalLoader::reload(bool load_never_loading) const loading_dispatcher->reload(load_never_loading); } +void ExternalLoader::reloadWithConfig( + const String & name, + const String & external_name, + const String & repo_name, + const Poco::AutoPtr & config, + const String & key, + bool load_never_loading, + bool sync) const +{ + loading_dispatcher->setConfiguration( + config_files_reader->updateLoadableInfo(external_name, name, repo_name, config, key)); + loading_dispatcher->reload(name, load_never_loading, sync); +} + + ExternalLoader::LoadablePtr ExternalLoader::createObject( const String & name, const ObjectConfig & config, bool config_changed, const LoadablePtr & previous_version) const { diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 71b18d9edd6..5a41072f04a 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -147,7 +147,9 @@ public: /// Starts reloading of a specified object. /// `load_never_loading` specifies what to do if the object has never been loading before. /// The function can either skip it (false) or load for the first time (true). - void reload(const String & name, bool load_never_loading = false) const; + /// Also function can load dictionary synchronously + void reload(const String & name, bool load_never_loading = false, bool sync = false) const; + /// Starts reloading of all the objects. /// `load_never_loading` specifies what to do with the objects which have never been loading before. @@ -157,6 +159,16 @@ public: protected: virtual LoadablePtr create(const String & name, const Poco::Util::AbstractConfiguration & config, const String & key_in_config) const = 0; + /// Reload object with already parsed configuration + void reloadWithConfig( + const String & name, /// name of dictionary + const String & external_name, /// name of source (example xml-file, may contain more than dictionary) + const String & repo_name, /// name of repository (database name, or all xml files) + const Poco::AutoPtr & config, + const String & key_in_config, /// key where we can start search of loadables (, , etc) + bool load_never_loading = false, + bool sync = false) const; + private: struct ObjectConfig; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index d21328f8e31..5ddf6843b85 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -773,6 +773,14 @@ void InterpreterCreateQuery::checkAccess(const ASTCreateQuery & create) throw Exception("Cannot create database. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); } + String object = "table"; + + if (create.is_dictionary) + { + if (readonly) + throw Exception("Cannot create dictionary in readonly mode", ErrorCodes::READONLY); + object = "dictionary"; + } if (create.temporary && readonly >= 2) return; @@ -780,6 +788,7 @@ void InterpreterCreateQuery::checkAccess(const ASTCreateQuery & create) if (readonly) throw Exception("Cannot create table or dictionary in readonly mode", ErrorCodes::READONLY); - throw Exception("Cannot create table. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); + throw Exception("Cannot create " + object + ". DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); } + } diff --git a/dbms/tests/integration/test_dictionaries_ddl/__init__.py b/dbms/tests/integration/test_dictionaries_ddl/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_dictionaries_ddl/configs/config.xml b/dbms/tests/integration/test_dictionaries_ddl/configs/config.xml new file mode 100644 index 00000000000..e24857fa806 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_ddl/configs/config.xml @@ -0,0 +1,19 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + + 9000 + 127.0.0.1 + 500 + 5368709120 + ./clickhouse/ + users.xml + /etc/clickhouse-server/config.d/*.xml + diff --git a/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/lazy_load.xml b/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/lazy_load.xml new file mode 100644 index 00000000000..d01f7a0155b --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/lazy_load.xml @@ -0,0 +1,4 @@ + + false + + diff --git a/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/simple_dictionary.xml b/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/simple_dictionary.xml new file mode 100644 index 00000000000..5032ca4266c --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/simple_dictionary.xml @@ -0,0 +1,41 @@ + + + xml_dictionary + + + localhost + 9000 + default + + test + xml_dictionary_table
+
+ + + + 0 + 0 + + + + 128 + + + + + id + + + SomeValue1 + UInt8 + 1 + + + + SomeValue2 + String + '' + + +
+
diff --git a/dbms/tests/integration/test_dictionaries_ddl/configs/users.xml b/dbms/tests/integration/test_dictionaries_ddl/configs/users.xml new file mode 100644 index 00000000000..3e53e05aee1 --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_ddl/configs/users.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + ::/0 + + default + default + + default + test + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/dbms/tests/integration/test_dictionaries_ddl/test.py b/dbms/tests/integration/test_dictionaries_ddl/test.py new file mode 100644 index 00000000000..6687bed215c --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_ddl/test.py @@ -0,0 +1,163 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry +from helpers.client import QueryRuntimeException +import pymysql +import warnings +import time + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) +node1 = cluster.add_instance('node1', with_mysql=True, main_configs=['configs/dictionaries/simple_dictionary.xml']) +node2 = cluster.add_instance('node2', with_mysql=True, main_configs=['configs/dictionaries/simple_dictionary.xml', 'configs/dictionaries/lazy_load.xml']) + + +def create_mysql_conn(user, password, hostname, port): + return pymysql.connect( + user=user, + password=password, + host=hostname, + port=port) + +def execute_mysql_query(connection, query): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with connection.cursor() as cursor: + cursor.execute(query) + connection.commit() + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for clickhouse in [node1, node2]: + clickhouse.query("CREATE DATABASE test", user="admin") + clickhouse.query("CREATE TABLE test.xml_dictionary_table (id UInt64, SomeValue1 UInt8, SomeValue2 String) ENGINE = MergeTree() ORDER BY id", user="admin") + clickhouse.query("INSERT INTO test.xml_dictionary_table SELECT number, number % 23, hex(number) from numbers(1000)", user="admin") + yield cluster + + finally: + cluster.shutdown() + + + +@pytest.mark.parametrize("clickhouse,name,layout", [ + (node1, 'complex_node1_hashed', 'LAYOUT(COMPLEX_KEY_HASHED())'), + (node1, 'complex_node1_cache', 'LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10))'), + (node2, 'complex_node2_hashed', 'LAYOUT(COMPLEX_KEY_HASHED())'), + (node2, 'complex_node2_cache', 'LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10))'), +]) +def test_crete_and_select_mysql(started_cluster, clickhouse, name, layout): + mysql_conn = create_mysql_conn("root", "clickhouse", "localhost", 3308) + execute_mysql_query(mysql_conn, "CREATE DATABASE IF NOT EXISTS clickhouse") + execute_mysql_query(mysql_conn, "CREATE TABLE clickhouse.{} (key_field1 int, key_field2 bigint, value1 text, value2 float, PRIMARY KEY (key_field1, key_field2))".format(name)) + values = [] + for i in range(1000): + values.append('(' + ','.join([str(i), str(i * i), str(i) * 5, str(i * 3.14)]) + ')') + execute_mysql_query(mysql_conn, "INSERT INTO clickhouse.{} VALUES ".format(name) + ','.join(values)) + + clickhouse.query(""" + CREATE DICTIONARY default.{} ( + key_field1 Int32, + key_field2 Int64, + value1 String DEFAULT 'xxx', + value2 Float32 DEFAULT 'yyy' + ) + PRIMARY KEY key_field1, key_field2 + SOURCE(MYSQL( + USER 'root' + PASSWORD 'clickhouse' + DB 'clickhouse' + TABLE '{}' + REPLICA(PRIORITY 1 HOST '127.0.0.1' PORT 3333) + REPLICA(PRIORITY 2 HOST 'mysql1' PORT 3306) + )) + {} + LIFETIME(MIN 1 MAX 3) + """.format(name, name, layout)) + + for i in range(172, 200): + assert clickhouse.query("SELECT dictGetString('default.{}', 'value1', tuple(toInt32({}), toInt64({})))".format(name, i, i * i)) == str(i) * 5 + '\n' + stroka = clickhouse.query("SELECT dictGetFloat32('default.{}', 'value2', tuple(toInt32({}), toInt64({})))".format(name, i, i * i)).strip() + value = float(stroka) + assert int(value) == int(i * 3.14) + + + for i in range(1000): + values.append('(' + ','.join([str(i), str(i * i), str(i) * 3, str(i * 2.718)]) + ')') + execute_mysql_query(mysql_conn, "REPLACE INTO clickhouse.{} VALUES ".format(name) + ','.join(values)) + + clickhouse.query("SYSTEM RELOAD DICTIONARY 'default.{}'".format(name)) + + for i in range(172, 200): + assert clickhouse.query("SELECT dictGetString('default.{}', 'value1', tuple(toInt32({}), toInt64({})))".format(name, i, i * i)) == str(i) * 3 + '\n' + stroka = clickhouse.query("SELECT dictGetFloat32('default.{}', 'value2', tuple(toInt32({}), toInt64({})))".format(name, i, i * i)).strip() + value = float(stroka) + assert int(value) == int(i * 2.718) + + clickhouse.query("select dictGetUInt8('xml_dictionary', 'SomeValue1', toUInt64(17))") == "17\n" + clickhouse.query("select dictGetString('xml_dictionary', 'SomeValue2', toUInt64(977))") == str(hex(977))[2:] + '\n' + + +def test_restricted_database(started_cluster): + for node in [node1, node2]: + node.query("CREATE DATABASE IF NOT EXISTS restricted_db", user="admin") + node.query("CREATE TABLE restricted_db.table_in_restricted_db AS test.xml_dictionary_table", user="admin") + + with pytest.raises(QueryRuntimeException): + node1.query(""" + CREATE DICTIONARY restricted_db.some_dict( + id UInt64, + SomeValue1 UInt8, + SomeValue2 String + ) + PRIMARY KEY id + LAYOUT(FLAT()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_in_restricted_db' DB 'restricted_db')) + LIFETIME(MIN 1 MAX 10) + """) + + with pytest.raises(QueryRuntimeException): + node1.query(""" + CREATE DICTIONARY default.some_dict( + id UInt64, + SomeValue1 UInt8, + SomeValue2 String + ) + PRIMARY KEY id + LAYOUT(FLAT()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_in_restricted_db' DB 'restricted_db')) + LIFETIME(MIN 1 MAX 10) + """) + + node1.query("SELECT dictGetUInt8('default.some_dict', 'SomeValue1', toUInt64(17))") == "17\n" + + # with lazy load we don't need query to get exception + with pytest.raises(QueryRuntimeException): + node2.query(""" + CREATE DICTIONARY restricted_db.some_dict( + id UInt64, + SomeValue1 UInt8, + SomeValue2 String + ) + PRIMARY KEY id + LAYOUT(FLAT()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_in_restricted_db' DB 'restricted_db')) + LIFETIME(MIN 1 MAX 10) + """) + + with pytest.raises(QueryRuntimeException): + node2.query(""" + CREATE DICTIONARY default.some_dict( + id UInt64, + SomeValue1 UInt8, + SomeValue2 String + ) + PRIMARY KEY id + LAYOUT(FLAT()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_in_restricted_db' DB 'restricted_db')) + LIFETIME(MIN 1 MAX 10) + """) From a014924c19d0ee07e8cbfa441348cdf610c78f7d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 18 Oct 2019 18:51:32 +0300 Subject: [PATCH 035/122] Remove wrong tests --- .../01018_ddl_dictionaries_create.reference | 1 - .../01018_ddl_dictionaries_create.sql | 19 ------------------- 2 files changed, 20 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index a8ee6b67912..327c02a4b8a 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -13,7 +13,6 @@ ordinary_db dict1 0 =DICTIONARY in Memory DB 0 -=DICTIONARY in Dictionary DB =DICTIONARY in Lazy DB =DROP DATABASE WITH DICTIONARY dict4 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index ce8e5c2cc2c..9e7cf24d9c5 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -99,25 +99,6 @@ EXISTS DICTIONARY memory_db.dict2; SELECT database, name FROM system.dictionaries WHERE name LIKE 'dict2'; -DROP DATABASE IF EXISTS dictionary_db; - -CREATE DATABASE dictionary_db ENGINE = Dictionary; - -SELECT '=DICTIONARY in Dictionary DB'; - -CREATE DICTIONARY dictionary_db.dict2 -( - key_column UInt64 DEFAULT 0 INJECTIVE, - second_column UInt8 DEFAULT 1 EXPRESSION rand() % 222, - third_column String DEFAULT 'qqq' -) -PRIMARY KEY key_column, second_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) -LIFETIME(MIN 1 MAX 10) -LAYOUT(COMPLEX_KEY_HASHED()); -- {serverError 1} - -DROP DATABASE IF EXISTS dictionary_db; - SELECT '=DICTIONARY in Lazy DB'; DROP DATABASE IF EXISTS lazy_db; From 25336528470b4e19b2300399196f5f07dee2611d Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 18 Oct 2019 19:16:57 +0300 Subject: [PATCH 036/122] columns match priority: table alias > table name> aliased table name --- .../Interpreters/CollectJoinOnKeysVisitor.cpp | 3 +- dbms/src/Interpreters/IdentifierSemantic.cpp | 80 +++++++------------ dbms/src/Interpreters/IdentifierSemantic.h | 19 ++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 7 +- .../TranslateQualifiedNamesVisitor.cpp | 46 ++++++----- .../00849_multiple_comma_join.reference | 12 +-- .../01018_anbiguous_column.reference | 6 ++ .../0_stateless/01018_anbiguous_column.sql | 21 +++++ 8 files changed, 105 insertions(+), 89 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01018_anbiguous_column.reference create mode 100644 dbms/tests/queries/0_stateless/01018_anbiguous_column.sql diff --git a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp index f8938f2a7d3..894e1ea3a5a 100644 --- a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -164,7 +164,8 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector IdentifierSemantic::getTableName(const ASTPtr & ast) return {}; } -void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value) -{ - identifier.semantic->need_long_name = value; -} - bool IdentifierSemantic::canBeAlias(const ASTIdentifier & identifier) { return identifier.semantic->can_be_alias; } -void IdentifierSemantic::setMembership(ASTIdentifier & identifier, size_t table_no) +void IdentifierSemantic::setMembership(ASTIdentifier & identifier, size_t table_pos) { - identifier.semantic->membership = table_no; + identifier.semantic->membership = table_pos; + identifier.semantic->can_be_alias = false; } -size_t IdentifierSemantic::getMembership(const ASTIdentifier & identifier) +std::optional IdentifierSemantic::getMembership(const ASTIdentifier & identifier) { return identifier.semantic->membership; } @@ -156,7 +152,7 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const { /// database.table.column if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table)) - return ColumnMatch::DatabaseAndTable; + return ColumnMatch::DbAndTable; /// alias.column if (doesIdentifierBelongTo(identifier, db_and_table.alias)) @@ -164,15 +160,36 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const /// table.column if (doesIdentifierBelongTo(identifier, db_and_table.table)) - return ColumnMatch::TableName; + { + if (!db_and_table.alias.empty()) + return ColumnMatch::AliasedTableName; + else + return ColumnMatch::TableName; + } return ColumnMatch::NoMatch; } -/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. -/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. -void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t to_strip) +/// Strip qualificators from left side of column name. +/// Example: 'database.table.name' -> 'name'. +void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) { + auto match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); + size_t to_strip = 0; + switch (match) + { + case ColumnMatch::TableName: + case ColumnMatch::AliasedTableName: + case ColumnMatch::TableAlias: + to_strip = 1; + break; + case ColumnMatch::DbAndTable: + to_strip = 2; + break; + default: + break; + } + if (!to_strip) return; @@ -188,31 +205,6 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t identifier.name.swap(new_name); } -void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) -{ - auto match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); - size_t to_strip = 0; - switch (match) - { - case ColumnMatch::TableName: - case ColumnMatch::TableAlias: - to_strip = 1; - break; - case ColumnMatch::DatabaseAndTable: - to_strip = 2; - break; - default: - break; - } - - setColumnShortName(identifier, to_strip); - if (value(match)) - identifier.semantic->can_be_alias = false; - - if (identifier.semantic->need_long_name) - setColumnLongName(identifier, db_and_table); -} - void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) { String prefix = db_and_table.getQualifiedNamePrefix(); @@ -225,16 +217,4 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat } } -String IdentifierSemantic::columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) -{ - ASTPtr copy = identifier.clone(); - setColumnNormalName(copy->as(), db_and_table); - return copy->getAliasOrColumnName(); -} - -String IdentifierSemantic::columnLongName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) -{ - return db_and_table.getQualifiedNamePrefix() + identifier.shortName(); -} - } diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 8a48227d6fe..832a3345b5a 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -9,9 +11,8 @@ namespace DB struct IdentifierSemanticImpl { bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT - bool need_long_name = false;/// if column presents in multiple tables we need qualified names bool can_be_alias = true; /// if it's a cropped name it could not be an alias - size_t membership = 0; /// table position in join (starting from 1) detected by qualifier or 0 if not detected. + std::optional membership; /// table position in join }; /// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier @@ -20,9 +21,9 @@ struct IdentifierSemantic enum class ColumnMatch { NoMatch, - ColumnName, /// table has column with same name + AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName) TableName, /// column qualified with table name - DatabaseAndTable, /// column qualified with database and table name + DbAndTable, /// column qualified with database and table name TableAlias, /// column qualified with table alias Ambiguous, }; @@ -37,14 +38,11 @@ struct IdentifierSemantic static std::pair extractDatabaseAndTable(const ASTIdentifier & identifier); static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static String columnLongName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name static bool canBeAlias(const ASTIdentifier & identifier); - static void setMembership(ASTIdentifier & identifier, size_t table_no); - static size_t getMembership(const ASTIdentifier & identifier); + static void setMembership(ASTIdentifier &, size_t table_no); + static std::optional getMembership(const ASTIdentifier & identifier); static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, bool ambiguous = false); static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, @@ -53,7 +51,6 @@ struct IdentifierSemantic private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table); - static void setColumnShortName(ASTIdentifier & identifier, size_t match); }; inline UInt32 value(IdentifierSemantic::ColumnMatch match) diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 3d43b7f0b25..c7cd789f882 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -565,11 +565,16 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery & s } } -void replaceJoinedTable(const ASTTablesInSelectQueryElement* join) +void replaceJoinedTable(const ASTTablesInSelectQueryElement * join) { if (!join || !join->table_expression) return; + /// TODO: Push down for CROSS JOIN is not OK [disabled] + const auto & table_join = join->table_join->as(); + if (table_join.kind == ASTTableJoin::Kind::Cross) + return; + auto & table_expr = join->table_expression->as(); if (table_expr.database_and_table_name) { diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 204fe454052..549b1c0c1e6 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -61,16 +61,20 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, { if (IdentifierSemantic::getColumnName(identifier)) { - size_t best_table_pos = 0; - bool allow_ambiguous = data.join_using_columns.count(identifier.shortName()); - if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos, allow_ambiguous)) - IdentifierSemantic::setMembership(identifier, best_table_pos + 1); + String short_name = identifier.shortName(); + size_t table_pos = 0; + bool allow_ambiguous = data.join_using_columns.count(short_name); + if (IdentifierSemantic::chooseTable(identifier, data.tables, table_pos, allow_ambiguous)) + { + IdentifierSemantic::setMembership(identifier, table_pos); - /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && data.hasColumn(identifier.shortName())) - IdentifierSemantic::setNeedLongName(identifier, true); - if (data.hasTable()) - IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first); + /// In case if column from the joined table are in source columns, change it's name to qualified. + auto & table = data.tables[table_pos].first; + if (table_pos && data.hasColumn(short_name)) + IdentifierSemantic::setColumnLongName(identifier, table); + else + IdentifierSemantic::setColumnShortName(identifier, table); + } } } @@ -126,8 +130,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr Visitor(data).visit(select.refHaving()); } -static void addIdentifier(ASTs & nodes, const String & table_name, const String & column_name, AsteriskSemantic::RevertedAliasesPtr aliases) +static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name, + AsteriskSemantic::RevertedAliasesPtr aliases) { + String table_name = table.getQualifiedNamePrefix(false); auto identifier = std::make_shared(std::vector{table_name, column_name}); bool added = false; @@ -189,8 +195,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { if (first_table || !data.join_using_columns.count(column_name)) { - String table_name = table.getQualifiedNamePrefix(false); - addIdentifier(node.children, table_name, column_name, AsteriskSemantic::getAliases(*asterisk)); + addIdentifier(node.children, table, column_name, AsteriskSemantic::getAliases(*asterisk)); } } @@ -206,8 +211,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { if (asterisk_pattern->isColumnMatching(column_name) && (first_table || !data.join_using_columns.count(column_name))) { - String table_name = table.getQualifiedNamePrefix(false); - addIdentifier(node.children, table_name, column_name, AsteriskSemantic::getAliases(*asterisk_pattern)); + addIdentifier(node.children, table, column_name, AsteriskSemantic::getAliases(*asterisk_pattern)); } } @@ -224,8 +228,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { for (const auto & column_name : table_columns) { - String table_name = table.getQualifiedNamePrefix(false); - addIdentifier(node.children, table_name, column_name, AsteriskSemantic::getAliases(*qualified_asterisk)); + addIdentifier(node.children, table, column_name, AsteriskSemantic::getAliases(*qualified_asterisk)); } break; } @@ -261,11 +264,14 @@ void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(const ASTPtr ast, D void RestoreQualifiedNamesData::visit(ASTIdentifier & identifier, ASTPtr & ast) { - if (IdentifierSemantic::getColumnName(identifier) && - IdentifierSemantic::getMembership(identifier)) + if (IdentifierSemantic::getColumnName(identifier)) { - ast = identifier.clone(); - ast->as()->restoreCompoundName(); + auto opt_match = IdentifierSemantic::getMembership(identifier); + if (opt_match && *opt_match) + { + ast = identifier.clone(); + ast->as()->restoreCompoundName(); + } } } diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference index e1256053739..65ec7f67718 100644 --- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference @@ -1,4 +1,4 @@ -SELECT a\nFROM t1_00849\nCROSS JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 +SELECT a\nFROM t1_00849\nCROSS JOIN t2_00849 SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) @@ -6,12 +6,12 @@ SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1 SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n WHERE `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n a AS `--t2_00849.a`, \n b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nCROSS JOIN t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nCROSS JOIN t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN t3_00849 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference b/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference new file mode 100644 index 00000000000..90d1da5b891 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference @@ -0,0 +1,6 @@ +0 0 +0 0 +0 +0 +0 +0 diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql b/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql new file mode 100644 index 00000000000..2873d3e58a0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql @@ -0,0 +1,21 @@ +select * from system.one cross join system.one; -- { serverError 352 } +select * from system.one cross join system.one r; +select * from system.one l cross join system.one; +select * from system.one left join system.one using dummy; +select dummy from system.one left join system.one using dummy; + +USE system; + +SELECT dummy FROM one AS A JOIN one ON A.dummy = one.dummy; +SELECT dummy FROM one JOIN one AS A ON A.dummy = one.dummy; +-- SELECT dummy FROM one l JOIN one r ON l.dummy = r.dummy; -- should be an error + +-- SELECT * from system.one +-- JOIN system.one one ON one.dummy = system.one.dummy +-- JOIN system.one two ON one.dummy = two.dummy +-- FORMAT PrettyCompact; + +-- SELECT * from system.one one +-- JOIN system.one ON one.dummy = system.one.dummy +-- JOIN system.one two ON one.dummy = two.dummy +-- FORMAT PrettyCompact; From 1bb81742852b3c5f6fa7fb02f4e8481d7152e90d Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 18 Oct 2019 19:34:06 +0300 Subject: [PATCH 037/122] disable push down for CROSS JOIN tests --- .../00826_cross_to_inner_join.reference | 28 +++++++++---------- .../0_stateless/00826_cross_to_inner_join.sql | 1 + .../0_stateless/00849_multiple_comma_join.sql | 1 + 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference index 24649ea3acb..df21becc999 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -56,26 +56,26 @@ comma nullable 1 1 1 1 2 2 1 2 cross -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\n, \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\n, t2_00826\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable vs not nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.b -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = t2_00826.b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b cross self SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nCROSS JOIN t1_00826 AS y\nWHERE (a = y.a) AND (b = y.b) SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) cross one table expr -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = b -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = b cross multiple ands -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) cross and inside and -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) cross split conjunction -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n WHERE b > 0\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql index 035662a0a0d..e21d257d2da 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -1,4 +1,5 @@ SET enable_debug_queries = 1; +SET enable_optimize_predicate_expression = 0; set allow_experimental_cross_to_join_conversion = 0; select * from system.one l cross join system.one r; diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.sql b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.sql index d1d247a0174..f80daecbe87 100644 --- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.sql +++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.sql @@ -1,4 +1,5 @@ SET enable_debug_queries = 1; +SET enable_optimize_predicate_expression = 0; SET joined_subquery_requires_alias = 0; DROP TABLE IF EXISTS t1_00849; From 37f07213eec2d18f386634f49c72aecb94fd212c Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 18 Oct 2019 19:45:59 +0300 Subject: [PATCH 038/122] better test --- .../0_stateless/01018_anbiguous_column.sql | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql b/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql index 2873d3e58a0..496a616da42 100644 --- a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql +++ b/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql @@ -8,14 +8,17 @@ USE system; SELECT dummy FROM one AS A JOIN one ON A.dummy = one.dummy; SELECT dummy FROM one JOIN one AS A ON A.dummy = one.dummy; --- SELECT dummy FROM one l JOIN one r ON l.dummy = r.dummy; -- should be an error +SELECT dummy FROM one l JOIN one r ON dummy = r.dummy; -- { serverError 352 } +SELECT dummy FROM one l JOIN one r ON l.dummy = dummy; -- { serverError 352 } +SELECT dummy FROM one l JOIN one r ON one.dummy = r.dummy; -- { serverError 352 } +SELECT dummy FROM one l JOIN one r ON l.dummy = one.dummy; -- { serverError 352 } --- SELECT * from system.one --- JOIN system.one one ON one.dummy = system.one.dummy --- JOIN system.one two ON one.dummy = two.dummy +-- SELECT * from one +-- JOIN one A ON one.dummy = A.dummy +-- JOIN one B ON one.dummy = B.dummy -- FORMAT PrettyCompact; - --- SELECT * from system.one one --- JOIN system.one ON one.dummy = system.one.dummy --- JOIN system.one two ON one.dummy = two.dummy +-- +-- SELECT * from one +-- JOIN one A ON dummy = A.dummy +-- JOIN one B ON dummy = B.dummy -- FORMAT PrettyCompact; From c5a850240c6eaf948ba719f335f0e3f7f8078450 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 18 Oct 2019 20:09:17 +0300 Subject: [PATCH 039/122] better ambiguous column detection in multiple join rewriter --- .../JoinToSubqueryTransformVisitor.cpp | 18 ++++++++---------- .../01018_anbiguous_column.reference | 6 ++++++ .../0_stateless/01018_anbiguous_column.sql | 18 +++++++++--------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index b7bf7c9b983..a2536521a7d 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -224,17 +224,15 @@ struct ColumnAliasesMatcher bool last_table = false; String long_name; - for (auto & table : data.tables) + + size_t table_pos = 0; + if (IdentifierSemantic::chooseTable(node, data.tables, table_pos)) { - if (value(IdentifierSemantic::canReferColumnToTable(node, table))) - { - if (!long_name.empty()) - throw Exception("Cannot refer column '" + node.name + "' to one table", ErrorCodes::AMBIGUOUS_COLUMN_NAME); - IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name - long_name = node.name; - if (&table == &data.tables.back()) - last_table = true; - } + auto & table = data.tables[table_pos]; + IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name + long_name = node.name; + if (&table == &data.tables.back()) + last_table = true; } if (long_name.empty()) diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference b/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference index 90d1da5b891..90b24009d0f 100644 --- a/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference +++ b/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference @@ -4,3 +4,9 @@ 0 0 0 +┌─one.dummy─┬─A.dummy─┬─B.dummy─┐ +│ 0 │ 0 │ 0 │ +└───────────┴─────────┴─────────┘ +┌─A.dummy─┬─one.dummy─┬─two.dummy─┐ +│ 0 │ 0 │ 0 │ +└─────────┴───────────┴───────────┘ diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql b/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql index 496a616da42..ab291178f87 100644 --- a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql +++ b/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql @@ -13,12 +13,12 @@ SELECT dummy FROM one l JOIN one r ON l.dummy = dummy; -- { serverError 352 } SELECT dummy FROM one l JOIN one r ON one.dummy = r.dummy; -- { serverError 352 } SELECT dummy FROM one l JOIN one r ON l.dummy = one.dummy; -- { serverError 352 } --- SELECT * from one --- JOIN one A ON one.dummy = A.dummy --- JOIN one B ON one.dummy = B.dummy --- FORMAT PrettyCompact; --- --- SELECT * from one --- JOIN one A ON dummy = A.dummy --- JOIN one B ON dummy = B.dummy --- FORMAT PrettyCompact; +SELECT * from one +JOIN one A ON one.dummy = A.dummy +JOIN one B ON one.dummy = B.dummy +FORMAT PrettyCompact; + +SELECT * from one A +JOIN system.one one ON A.dummy = one.dummy +JOIN system.one two ON A.dummy = two.dummy +FORMAT PrettyCompact; From 5e299a46bbb1c143965c98641553f4a412cd8e4a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 15 Oct 2019 00:25:18 +0300 Subject: [PATCH 040/122] docs: enumerate uniqCombined64 for count_distinct_implementation --- docs/en/operations/settings/settings.md | 1 + docs/ru/operations/settings/settings.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ef1b664272e..53b50931ec2 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -898,6 +898,7 @@ Possible values: - [uniq](../../query_language/agg_functions/reference.md#agg_function-uniq) - [uniqCombined](../../query_language/agg_functions/reference.md#agg_function-uniqcombined) +- [uniqCombined64](../../query_language/agg_functions/reference.md#agg_function-uniqcombined64) - [uniqHLL12](../../query_language/agg_functions/reference.md#agg_function-uniqhll12) - [uniqExact](../../query_language/agg_functions/reference.md#agg_function-uniqexact) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 90803a7d27e..ba4f07cda3b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -777,6 +777,7 @@ load_balancing = first_or_random - [uniq](../../query_language/agg_functions/reference.md#agg_function-uniq) - [uniqCombined](../../query_language/agg_functions/reference.md#agg_function-uniqcombined) +- [uniqCombined64](../../query_language/agg_functions/reference.md#agg_function-uniqcombined64) - [uniqHLL12](../../query_language/agg_functions/reference.md#agg_function-uniqhll12) - [uniqExact](../../query_language/agg_functions/reference.md#agg_function-uniqexact) From 260a9fba2b72512ae91a8784f07cf5f0b4fec2c5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 18 Oct 2019 14:26:39 +0300 Subject: [PATCH 041/122] StorageDistributed: Drop extra inclusion of materializeBlock.h --- dbms/src/Storages/StorageDistributed.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 2c289dd714e..8361daefd87 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -1,7 +1,6 @@ #include #include -#include #include From 797f76a187e8fbc469ddcea0154490af72c14956 Mon Sep 17 00:00:00 2001 From: VDimir Date: Sat, 19 Oct 2019 07:26:19 +0000 Subject: [PATCH 042/122] Separate class for IsValidJSONImpl --- dbms/src/Functions/FunctionsJSON.cpp | 2 +- dbms/src/Functions/FunctionsJSON.h | 44 +++++++++++++++------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/dbms/src/Functions/FunctionsJSON.cpp b/dbms/src/Functions/FunctionsJSON.cpp index eedbecf852d..673a04a8353 100644 --- a/dbms/src/Functions/FunctionsJSON.cpp +++ b/dbms/src/Functions/FunctionsJSON.cpp @@ -8,7 +8,7 @@ namespace DB void registerFunctionsJSON(FunctionFactory & factory) { factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h index 1f93295c3aa..ca9f9d59136 100644 --- a/dbms/src/Functions/FunctionsJSON.h +++ b/dbms/src/Functions/FunctionsJSON.h @@ -293,25 +293,11 @@ struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKey struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -template -class JSONCheckImpl +template +class JSONHasImpl { public: - static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if constexpr (!support_key_lookup) - { - if (arguments.size() != 1) - throw Exception{"Function " + String(function_name) + " needs exactly one argument", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - } - else - { - UNUSED(function_name); - UNUSED(arguments); - } - return std::make_shared(); - } + static DataTypePtr getType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } using Iterator = typename JSONParser::Iterator; static bool addValueToColumn(IColumn & dest, const Iterator &) @@ -325,10 +311,28 @@ public: static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {} }; + template -using JSONHasImpl = JSONCheckImpl; -template -using isValidJSONImpl = JSONCheckImpl; +class IsValidJSONImpl +{ + static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() != 1) + throw Exception{"Function " + String(function_name) + " needs exactly one argument", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + return std::make_shared(); + } + + using Iterator = typename JSONParser::Iterator; + static bool addValueToColumn(IColumn & dest, const Iterator &) + { + JSONHasImpl::addValueToColumn(dest); + } + + static constexpr size_t num_extra_arguments = 0; + static void prepare(const char *, const Block &, const ColumnNumbers &, size_t) {} +}; + template class JSONLengthImpl From b0f2f23a389c3a2c2316d9910fcde98e5a7141a2 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Sun, 20 Oct 2019 10:23:05 +0100 Subject: [PATCH 043/122] Test materialized view pushing extra columns This test exists to prevent unintended changes to existing behaviour. Although this behaviour might not be ideal it is can be exploited for 0-downtime changes to materialized views. Step 1: Add new column to source table. Step 2: Create new view reading source column. Step 3: Swap views using `RENAME TABLE`. Step 4: Add new column to destination table as well. --- ...alized_view_select_extra_columns.reference | 3 ++ ...materialized_view_select_extra_columns.sql | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.reference create mode 100644 dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql diff --git a/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.reference b/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.reference new file mode 100644 index 00000000000..4539bbf2d22 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.reference @@ -0,0 +1,3 @@ +0 +1 +2 diff --git a/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql b/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql new file mode 100644 index 00000000000..6e928a1ea06 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS mv_extra_columns_dst; +DROP TABLE IF EXISTS mv_extra_columns_src; +DROP TABLE IF EXISTS mv_extra_columns_view; + +CREATE TABLE mv_extra_columns_dst ( + v UInt64 +) ENGINE = MergeTree() + PARTITION BY tuple() + ORDER BY v; + +CREATE TABLE mv_extra_columns_src ( + v1 UInt64, + v2 UInt64 +) ENGINE = Null; + +-- Extra columns are ignored when pushing to destination table. +-- This test exists to prevent unintended changes to existing behaviour. +-- +-- Although this behaviour might not be ideal it is can be exploited for 0-downtime changes to materialized views. +-- Step 1: Add new column to source table. Step 2: Create new view reading source column. +-- Step 3: Swap views using `RENAME TABLE`. Step 4: Add new column to destination table as well. +CREATE MATERIALIZED VIEW mv_extra_columns_view TO mv_extra_columns_dst +AS SELECT + v1 as v, + v2 as v2 +FROM mv_extra_columns_src; + +INSERT INTO mv_extra_columns_src VALUES (0, 0), (1, 1), (2, 2); + +SELECT * FROM mv_extra_columns_dst ORDER by v; +SELECT * FROM mv_extra_columns_view; -- { serverError 16 } + +DROP TABLE mv_extra_columns_view; +DROP TABLE mv_extra_columns_src; +DROP TABLE mv_extra_columns_dst; \ No newline at end of file From 61125e536bf2eaa8581d406ad4190a1431a3be6d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 20 Oct 2019 17:59:26 +0300 Subject: [PATCH 044/122] Update 01019_materialized_view_select_extra_columns.sql --- .../01019_materialized_view_select_extra_columns.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql b/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql index 6e928a1ea06..10bda34e6b9 100644 --- a/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql +++ b/dbms/tests/queries/0_stateless/01019_materialized_view_select_extra_columns.sql @@ -16,7 +16,7 @@ CREATE TABLE mv_extra_columns_src ( -- Extra columns are ignored when pushing to destination table. -- This test exists to prevent unintended changes to existing behaviour. -- --- Although this behaviour might not be ideal it is can be exploited for 0-downtime changes to materialized views. +-- Although this behaviour might not be ideal it can be exploited for 0-downtime changes to materialized views. -- Step 1: Add new column to source table. Step 2: Create new view reading source column. -- Step 3: Swap views using `RENAME TABLE`. Step 4: Add new column to destination table as well. CREATE MATERIALIZED VIEW mv_extra_columns_view TO mv_extra_columns_dst @@ -32,4 +32,4 @@ SELECT * FROM mv_extra_columns_view; -- { serverError 16 } DROP TABLE mv_extra_columns_view; DROP TABLE mv_extra_columns_src; -DROP TABLE mv_extra_columns_dst; \ No newline at end of file +DROP TABLE mv_extra_columns_dst; From e080324fd0b3ab96a51a12acd8ecdf1142ffbbbf Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 21 Oct 2019 12:13:33 +0300 Subject: [PATCH 045/122] Fixed NULL-values in nullable columns through ODBC-bridge --- dbms/programs/odbc-bridge/ColumnInfoHandler.cpp | 17 ++++++++++++++--- .../integration/test_odbc_interaction/test.py | 14 ++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp index 2b46a6cd49f..b188c0bea88 100644 --- a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -154,9 +155,19 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques SQLSMALLINT type = 0; /// TODO Why 301? SQLCHAR column_name[301]; - /// TODO Result is not checked. - POCO_SQL_ODBC_CLASS::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), nullptr, &type, nullptr, nullptr, nullptr); - columns.emplace_back(reinterpret_cast(column_name), getDataType(type)); + + SQLSMALLINT nullable; + const auto result = POCO_SQL_ODBC_CLASS::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), nullptr, &type, nullptr, nullptr, &nullable); + if (POCO_SQL_ODBC_CLASS::Utility::isError(result)) + throw POCO_SQL_ODBC_CLASS::StatementException(hstmt); + + auto column_type = getDataType(type); + if (nullable == SQL_NULLABLE) + { + column_type = std::make_shared(column_type); + } + + columns.emplace_back(reinterpret_cast(column_name), std::move(column_type)); } WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index d65745aa496..d8e9cc3fb1a 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -18,6 +18,7 @@ create_table_sql_template = """ `name` varchar(50) NOT NULL, `age` int NOT NULL default 0, `money` int NOT NULL default 0, + `column_x` int default NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB; """ def get_mysql_conn(): @@ -86,12 +87,21 @@ def test_mysql_simple_select_works(started_cluster): conn = get_mysql_conn() create_mysql_table(conn, table_name) + # Check that NULL-values are handled correctly by the ODBC-bridge + with conn.cursor() as cursor: + cursor.execute("INSERT INTO clickhouse.{} VALUES(50, 'null-guy', 127, 255, NULL), (100, 'non-null-guy', 127, 255, 511);".format(table_name)) + conn.commit() + assert node1.query("SELECT column_x FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name)) == '\\N\n511\n' + node1.query(''' -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql1:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nullable(UInt32)) ENGINE = MySQL('mysql1:3306', 'clickhouse', '{}', 'root', 'clickhouse'); '''.format(table_name, table_name)) - node1.query("INSERT INTO {}(id, name, money) select number, concat('name_', toString(number)), 3 from numbers(100) ".format(table_name)) + node1.query("INSERT INTO {}(id, name, money, column_x) select number, concat('name_', toString(number)), 3, NULL from numbers(49) ".format(table_name)) + node1.query("INSERT INTO {}(id, name, money, column_x) select number, concat('name_', toString(number)), 3, 42 from numbers(51, 49) ".format(table_name)) + assert node1.query("SELECT COUNT () FROM {} WHERE column_x IS NOT NULL".format(table_name)) == '50\n' + assert node1.query("SELECT COUNT () FROM {} WHERE column_x IS NULL".format(table_name)) == '50\n' assert node1.query("SELECT count(*) FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name)) == '100\n' # previously this test fails with segfault From 5ef7376cbf7a74437f3d316793a2dd45e1c31046 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 21 Oct 2019 14:22:22 +0300 Subject: [PATCH 046/122] fix distributed join on --- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 549b1c0c1e6..df0946f098a 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -266,8 +266,7 @@ void RestoreQualifiedNamesData::visit(ASTIdentifier & identifier, ASTPtr & ast) { if (IdentifierSemantic::getColumnName(identifier)) { - auto opt_match = IdentifierSemantic::getMembership(identifier); - if (opt_match && *opt_match) + if (IdentifierSemantic::getMembership(identifier)) { ast = identifier.clone(); ast->as()->restoreCompoundName(); From 0d708569e376e9d9f977a4212fc86fd68f317b11 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 21 Oct 2019 14:31:16 +0300 Subject: [PATCH 047/122] fix test result (disabled push down) --- .../00849_multiple_comma_join.reference | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference index 65ec7f67718..453458a6ecf 100644 --- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference @@ -1,17 +1,17 @@ SELECT a\nFROM t1_00849\nCROSS JOIN t2_00849 -SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a -SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n WHERE `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n a AS `--t2_00849.a`, \n b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) +SELECT a\nFROM t1_00849\nALL INNER JOIN t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a +SELECT a\nFROM t1_00849\nALL INNER JOIN t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n)\nALL INNER JOIN t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n)\nALL INNER JOIN t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n)\nALL INNER JOIN t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n ALL INNER JOIN t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nALL INNER JOIN t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n)\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nCROSS JOIN t4_00849 SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n )\n CROSS JOIN t3_00849\n)\nCROSS JOIN t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN t3_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN t3_00849 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N From 28710b7e4259977b2cf5b846fad39f2bc3175c3f Mon Sep 17 00:00:00 2001 From: akonyaev Date: Mon, 21 Oct 2019 15:46:47 +0300 Subject: [PATCH 048/122] unexpected behaviour for avg and quantile functions over empty decimal column without group by --- dbms/src/AggregateFunctions/AggregateFunctionAvg.h | 4 ++++ dbms/src/AggregateFunctions/ReservoirSampler.h | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index 840075511ea..a9fd955b509 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -27,6 +27,10 @@ struct AggregateFunctionAvgData template ResultT NO_SANITIZE_UNDEFINED result() const { + if constexpr (std::is_floating_point_v) + if constexpr (std::numeric_limits::is_iec559) + return static_cast(sum) / count; /// allow division by zero + if (count == 0) return 0; return static_cast(sum) / count; diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index 30d72709ac2..ee0b02c3cda 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -108,9 +108,12 @@ public: */ double quantileInterpolated(double level) { - if (samples.empty()) + if (samples.empty()) { + if (DB::IsDecimalNumber) { + return 0; + } return onEmpty(); - + } sortIfNeeded(); double index = std::max(0., std::min(samples.size() - 1., level * (samples.size() - 1))); From c70cd3dd17f38ae4b5cbf90815f2e23ac5b708b5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 21 Oct 2019 15:47:27 +0300 Subject: [PATCH 049/122] Auto version update to [19.16.1.1532] [54427] --- dbms/cmake/version.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 8dcdcf64a7a..6d94b921111 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -3,9 +3,9 @@ set(VERSION_REVISION 54427) set(VERSION_MAJOR 19) set(VERSION_MINOR 16) set(VERSION_PATCH 1) -set(VERSION_GITHASH 38f65a6a2120d2e76bcf71131068f41195149dfc) -set(VERSION_DESCRIBE v19.16.1.1-prestable) -set(VERSION_STRING 19.16.1.1) +set(VERSION_GITHASH 5286d0afb285a5fbf3d320af3daa6de6b1841374) +set(VERSION_DESCRIBE v19.16.1.1532-prestable) +set(VERSION_STRING 19.16.1.1532) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") From 7381489d5d703d5a813df440be75875b1593fa99 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 21 Oct 2019 15:48:00 +0300 Subject: [PATCH 050/122] Auto version update to [19.17.1.1] [54428] --- dbms/cmake/version.cmake | 8 ++++---- .../StorageSystemContributors.generated.cpp | 15 +++++++++++++++ debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 6d94b921111..a544274826e 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54427) +set(VERSION_REVISION 54428) set(VERSION_MAJOR 19) -set(VERSION_MINOR 16) +set(VERSION_MINOR 17) set(VERSION_PATCH 1) set(VERSION_GITHASH 5286d0afb285a5fbf3d320af3daa6de6b1841374) -set(VERSION_DESCRIBE v19.16.1.1532-prestable) -set(VERSION_STRING 19.16.1.1532) +set(VERSION_DESCRIBE v19.17.1.1-prestable) +set(VERSION_STRING 19.17.1.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp index 3822b648842..6261ab1c2b5 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -5,6 +5,7 @@ const char * auto_contributors[] { "Akazz", "Alberto", "Aleksandra (Ася)", + "Aleksey Akulovich", "Alex Bocharov", "Alex Krash", "Alex Ryndin", @@ -90,7 +91,9 @@ const char * auto_contributors[] { "Daniel Bershatsky", "Daniel Dao", "Danila Kutenin", + "Dario", "DarkWanderer", + "Darío", "Denis Burlaka", "Denis Zhuravlev", "Derek Perkins", @@ -103,6 +106,7 @@ const char * auto_contributors[] { "Dmitry S..ky / skype: dvska-at-skype", "Doge", "Eldar Zaitov", + "Elena Baskakova", "Elghazal Ahmed", "Emmanuel Donin de Rosière", "Eric", @@ -114,6 +118,7 @@ const char * auto_contributors[] { "Evgeny Konkov", "Fabian Stäber", "Fadi Hadzh", + "FawnD2", "FeehanG", "Flowyi", "Francisco Barón", @@ -124,6 +129,7 @@ const char * auto_contributors[] { "George3d6", "Gleb Kanterov", "Gleb Novikov", + "Gleb-Tretyakov", "Guillaume Tassery", "Hamoon", "Hasnat", @@ -140,6 +146,7 @@ const char * auto_contributors[] { "Ilya Breev", "Ilya Khomutov", "Ilya Korolev", + "Ilya Kovalenko", "Ilya Shipitsin", "Ilya Skrypitsa", "Ivan", @@ -208,6 +215,7 @@ const char * auto_contributors[] { "Mikhail", "Mikhail Fandyushin", "Mikhail Filimonov", + "Mikhail Korotov", "Mikhail Salosin", "Mikhail Surin", "Mikhail f. Shiryaev", @@ -246,6 +254,7 @@ const char * auto_contributors[] { "Pavlo Bashynskiy", "Pawel Rog", "Persiyanov Dmitriy Andreevich", + "Philippe Ombredanne", "Quid37", "Rafael David Tinoco", "Ramazan Polat", @@ -296,6 +305,7 @@ const char * auto_contributors[] { "Vasily Nemkov", "Vasily Okunev", "Vasily Vasilkov", + "Vasilyev Nikita", "Veloman Yunkan", "Veniamin Gvozdikov", "Victor Tarnavsky", @@ -308,6 +318,7 @@ const char * auto_contributors[] { "Vlad Arkhipov", "Vladimir Chebotarev", "Vladimir Kolobaev", + "Vladimir Kopysov", "Vladimir Kozbin", "Vladimir Smirnov", "Vladislav Rassokhin", @@ -336,6 +347,7 @@ const char * auto_contributors[] { "akuzm", "alesapin", "alex-zaitsev", + "alex.lvxin", "alexander kozhikhov", "alexey-milovidov", "andrewsg", @@ -355,6 +367,7 @@ const char * auto_contributors[] { "chenxing-xc", "chenxing.xc", "chertus", + "comunodi", "coraxster", "daoready", "dasmfm", @@ -449,6 +462,8 @@ const char * auto_contributors[] { "urgordeadbeef", "velom", "vicdashkov", + "vinity", + "vzakaznikov", "zamulla", "zhang2014", "Šimon Podlipský", diff --git a/debian/changelog b/debian/changelog index 131741b202f..b61349fc41a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.16.1.1) unstable; urgency=low +clickhouse (19.17.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 24 Sep 2019 14:47:28 +0300 + -- clickhouse-release Mon, 21 Oct 2019 15:47:56 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 3134686b0c0..38558228893 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.16.1.* +ARG version=19.17.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 4b5420a3e5a..1bd22f1c95b 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.16.1.* +ARG version=19.17.1.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 189e76d6c6b..fd03a7e0b56 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.16.1.* +ARG version=19.17.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From f6bb52637d25b46665c55109fe368d11c5b6ae73 Mon Sep 17 00:00:00 2001 From: akonyaev Date: Mon, 21 Oct 2019 16:05:32 +0300 Subject: [PATCH 051/122] return for empty avg result over decimal 0 in ResultT type --- dbms/src/AggregateFunctions/AggregateFunctionAvg.h | 2 +- dbms/src/AggregateFunctions/ReservoirSampler.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index a9fd955b509..6700adf6b20 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -32,7 +32,7 @@ struct AggregateFunctionAvgData return static_cast(sum) / count; /// allow division by zero if (count == 0) - return 0; + return static_cast(0); return static_cast(sum) / count; } }; diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index ee0b02c3cda..c0b35408b71 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -110,7 +110,7 @@ public: { if (samples.empty()) { if (DB::IsDecimalNumber) { - return 0; + return static_cast(0); } return onEmpty(); } From a864447802ec451bed6f68af8af66031d48c9ab5 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 20 Aug 2019 20:38:56 +0300 Subject: [PATCH 052/122] Introduce String Hash Map. It speeds up aggregation over short string keys. Use it as a default aggregation method for string keys. --- dbms/src/Common/HashTable/HashTable.h | 6 + dbms/src/Common/HashTable/StringHashMap.h | 180 ++++++++ dbms/src/Common/HashTable/StringHashTable.h | 407 ++++++++++++++++++ .../Common/HashTable/TwoLevelStringHashMap.h | 37 ++ .../HashTable/TwoLevelStringHashTable.h | 217 ++++++++++ dbms/src/Interpreters/Aggregator.h | 77 +++- dbms/src/Interpreters/tests/CMakeLists.txt | 6 + .../Interpreters/tests/string_hash_map.cpp | 246 +++++++++++ 8 files changed, 1172 insertions(+), 4 deletions(-) create mode 100644 dbms/src/Common/HashTable/StringHashMap.h create mode 100644 dbms/src/Common/HashTable/StringHashTable.h create mode 100644 dbms/src/Common/HashTable/TwoLevelStringHashMap.h create mode 100644 dbms/src/Common/HashTable/TwoLevelStringHashTable.h create mode 100644 dbms/src/Interpreters/tests/string_hash_map.cpp diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index f13d6f6e3dd..398b4b594da 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -358,6 +358,12 @@ protected: template friend class TwoLevelHashTable; + template + friend class TwoLevelStringHashTable; + + template + friend class StringHashTable; + using HashValue = size_t; using Self = HashTable; using cell_type = Cell; diff --git a/dbms/src/Common/HashTable/StringHashMap.h b/dbms/src/Common/HashTable/StringHashMap.h new file mode 100644 index 00000000000..4fcc46eee24 --- /dev/null +++ b/dbms/src/Common/HashTable/StringHashMap.h @@ -0,0 +1,180 @@ +#pragma once + +#include +#include +#include + +template +struct StringHashMapCell : public HashMapCell +{ + using Base = HashMapCell; + using Base::Base; + static constexpr bool need_zero_value_storage = false; +}; + +template +auto lookupResultGetMapped(StringHashMapCell * cell) { return &cell->getSecond(); } + +template +struct StringHashMapCell : public HashMapCell +{ + using Base = HashMapCell; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } + // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method + static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; } + void setZero() { this->value.first.low = 0; } +}; + +template +struct StringHashMapCell : public HashMapCell +{ + using Base = HashMapCell; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } + // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method + static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; } + void setZero() { this->value.first.a = 0; } +}; + +template +struct StringHashMapCell : public HashMapCellWithSavedHash +{ + using Base = HashMapCellWithSavedHash; + using Base::Base; + static constexpr bool need_zero_value_storage = false; +}; + +template +struct StringHashMapSubMaps +{ + using T0 = StringHashTableEmpty>; + using T1 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T2 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T3 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using Ts = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; +}; + +template +class StringHashMap : public StringHashTable> +{ +public: + using Base = StringHashTable>; + using Self = StringHashMap; + using Key = StringRef; + using key_type = StringRef; + using mapped_type = TMapped; + using value_type = typename Base::Ts::value_type; + using LookupResult = mapped_type *; + + using Base::Base; + + /// Merge every cell's value of current map into the destination map. + /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). + /// Each filled cell in current map will invoke func once. If that map doesn't + /// have a key equals to the given cell, a new cell gets emplaced into that map, + /// and func is invoked with the third argument emplaced set to true. Otherwise + /// emplaced is set to false. + template + void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) + { + if (this->m0.hasZero()) + { + const bool emplace_new_zero = !that.m0.hasZero(); + if (emplace_new_zero) + { + that.m0.setHasZero(); + } + + func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), + emplace_new_zero); + } + + this->m1.mergeToViaEmplace(that.m1, func); + this->m2.mergeToViaEmplace(that.m2, func); + this->m3.mergeToViaEmplace(that.m3, func); + this->ms.mergeToViaEmplace(that.ms, func); + } + + /// Merge every cell's value of current map into the destination map via find. + /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). + /// Each filled cell in current map will invoke func once. If that map doesn't + /// have a key equals to the given cell, func is invoked with the third argument + /// exist set to false. Otherwise exist is set to true. + template + void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) + { + if (this->m0.hasZero()) + { + if (that.m0.hasZero()) + { + func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true); + } + else + { + func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false); + } + } + + this->m1.mergeToViaFind(that.m1, func); + this->m2.mergeToViaFind(that.m2, func); + this->m3.mergeToViaFind(that.m3, func); + this->ms.mergeToViaFind(that.ms, func); + } + + mapped_type & ALWAYS_INLINE operator[](Key x) + { + bool inserted; + LookupResult it = nullptr; + emplace(x, it, inserted); + if (inserted) + new (it) mapped_type(); + return *it; + } + + template + void ALWAYS_INLINE forEachValue(Func && func) + { + if (this->m0.size()) + { + func(StringRef{}, this->m0.zeroValue()->getSecond()); + } + + for (auto & v : this->m1) + { + func(toStringRef(v.getFirst()), v.getSecond()); + } + + for (auto & v : this->m2) + { + func(toStringRef(v.getFirst()), v.getSecond()); + } + + for (auto & v : this->m3) + { + func(toStringRef(v.getFirst()), v.getSecond()); + } + + for (auto & v : this->ms) + { + func(v.getFirst(), v.getSecond()); + } + } + + template + void ALWAYS_INLINE forEachMapped(Func && func) + { + if (this->m0.size()) + func(this->m0.zeroValue()->getSecond()); + for (auto & v : this->m1) + func(v.getSecond()); + for (auto & v : this->m2) + func(v.getSecond()); + for (auto & v : this->m3) + func(v.getSecond()); + for (auto & v : this->ms) + func(v.getSecond()); + } +}; diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h new file mode 100644 index 00000000000..e8df4ec0fa3 --- /dev/null +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -0,0 +1,407 @@ +#pragma once + +#include +#include + +#define CASE_1_8 \ + case 1: \ + case 2: \ + case 3: \ + case 4: \ + case 5: \ + case 6: \ + case 7: \ + case 8 + +#define CASE_9_16 \ + case 9: \ + case 10: \ + case 11: \ + case 12: \ + case 13: \ + case 14: \ + case 15: \ + case 16 + +#define CASE_17_24 \ + case 17: \ + case 18: \ + case 19: \ + case 20: \ + case 21: \ + case 22: \ + case 23: \ + case 24 + +struct StringKey0 +{ +}; + +using StringKey8 = UInt64; +using StringKey16 = DB::UInt128; +struct StringKey24 +{ + UInt64 a; + UInt64 b; + UInt64 c; + + bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } + bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); } + bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; } + bool operator!=(const UInt64 rhs) const { return !operator==(rhs); } + + StringKey24 & operator=(const UInt64 rhs) + { + a = rhs; + b = 0; + c = 0; + return *this; + } +}; + +inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n) +{ + return {reinterpret_cast(&n), 8ul - (__builtin_clzll(n) >> 3)}; +} +inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n) +{ + return {reinterpret_cast(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; +} +inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) +{ + return {reinterpret_cast(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; +} +inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s) +{ + return s; +} + +struct StringHashTableHash +{ +#if defined(__SSE4_2__) + size_t ALWAYS_INLINE operator()(StringKey8 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key); + return res; + } + size_t ALWAYS_INLINE operator()(StringKey16 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.low); + res = _mm_crc32_u64(res, key.high); + return res; + } + size_t ALWAYS_INLINE operator()(StringKey24 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.a); + res = _mm_crc32_u64(res, key.b); + res = _mm_crc32_u64(res, key.c); + return res; + } +#else + size_t ALWAYS_INLINE operator()(StringKey8 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 8); + } + size_t ALWAYS_INLINE operator()(StringKey16 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 16); + } + size_t ALWAYS_INLINE operator()(StringKey24 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); + } +#endif + size_t ALWAYS_INLINE operator()(StringRef key) const + { + return StringRefHash()(key); + } +}; + +template +struct StringHashTableEmpty +{ + using Self = StringHashTableEmpty; + + bool has_zero = false; + std::aligned_storage_t zero_value_storage; /// Storage of element with zero key. + +public: + bool hasZero() const { return has_zero; } + + void setHasZero() + { + has_zero = true; + new (zeroValue()) Cell(); + } + + void setHasZero(const Cell & other) + { + has_zero = true; + new (zeroValue()) Cell(other); + } + + void clearHasZero() + { + has_zero = false; + if (!std::is_trivially_destructible_v) + zeroValue()->~Cell(); + } + + Cell * zeroValue() { return reinterpret_cast(&zero_value_storage); } + const Cell * zeroValue() const { return reinterpret_cast(&zero_value_storage); } + + using LookupResult = Cell *; + using ConstLookupResult = const Cell *; + + template + void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */) + { + if (!hasZero()) + { + setHasZero(); + inserted = true; + } + else + inserted = false; + it = zeroValue(); + } + + template + LookupResult ALWAYS_INLINE find(Key, size_t /* hash */) + { + return hasZero() ? zeroValue() : nullptr; + } + + + void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } + void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } + void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } + void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); } + size_t size() const { return hasZero() ? 1 : 0; } + bool empty() const { return !hasZero(); } + size_t getBufferSizeInBytes() const { return sizeof(Cell); } + size_t getCollisions() const { return 0; } +}; + +template +struct StringHashTableGrower : public HashTableGrower +{ + // Smooth growing for string maps + void increaseSize() { this->size_degree += 1; } +}; + +template +class StringHashTable : private boost::noncopyable +{ +protected: + static constexpr size_t NUM_MAPS = 5; + // Map for storing empty string + using T0 = typename SubMaps::T0; + + // Short strings are stored as numbers + using T1 = typename SubMaps::T1; + using T2 = typename SubMaps::T2; + using T3 = typename SubMaps::T3; + + // Long strings are stored as StringRef along with saved hash + using Ts = typename SubMaps::Ts; + using Self = StringHashTable; + + template + friend class TwoLevelStringHashTable; + + T0 m0; + T1 m1; + T2 m2; + T3 m3; + Ts ms; + +public: + using Key = StringRef; + using key_type = Key; + using value_type = typename Ts::value_type; + using LookupResult = typename Ts::mapped_type *; + + StringHashTable() {} + + StringHashTable(size_t reserve_for_num_elements) + : m1{reserve_for_num_elements / 4} + , m2{reserve_for_num_elements / 4} + , m3{reserve_for_num_elements / 4} + , ms{reserve_for_num_elements / 4} + { + } + + StringHashTable(StringHashTable && rhs) { *this = std::move(rhs); } + ~StringHashTable() {} + +public: + // Dispatch is written in a way that maximizes the performance: + // 1. Always memcpy 8 times bytes + // 2. Use switch case extension to generate fast dispatching table + // 3. Combine hash computation along with key loading + // 4. Funcs are named callables that can be force_inlined + // NOTE: It relies on Little Endianness and SSE4.2 + template + decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) + { + static constexpr StringKey0 key0{}; + const StringRef & x = keyHolderGetKey(key_holder); + size_t sz = x.size; + const char * p = x.data; + // pending bits that needs to be shifted out + char s = (-sz & 7) * 8; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + StringHashTableHash hash; + switch (sz) + { + case 0: + keyHolderDiscardKey(key_holder); + return func(m0, key0, 0); + CASE_1_8 : { + // first half page + if ((reinterpret_cast(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + n[0] &= -1ul >> s; + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + n[0] >>= s; + } + keyHolderDiscardKey(key_holder); + return func(m1, k8, hash(k8)); + } + CASE_9_16 : { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + n[1] >>= s; + keyHolderDiscardKey(key_holder); + return func(m2, k16, hash(k16)); + } + CASE_17_24 : { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + n[2] >>= s; + keyHolderDiscardKey(key_holder); + return func(m3, k24, hash(k24)); + } + default: { + return func(ms, std::forward(key_holder), hash(x)); + } + } + } + + struct EmplaceCallable + { + LookupResult & mapped; + bool & inserted; + + EmplaceCallable(LookupResult & mapped_, bool & inserted_) + : mapped(mapped_), inserted(inserted_) {} + + template + void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) + { + typename Map::LookupResult result; + map.emplace(key_holder, result, inserted, hash); + mapped = lookupResultGetMapped(result); + } + }; + + template + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + this->dispatch(key_holder, EmplaceCallable(it, inserted)); + } + + struct FindCallable + { + template + LookupResult ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) + { + return lookupResultGetMapped(map.find(keyHolderGetKey(key_holder), hash)); + } + }; + + LookupResult ALWAYS_INLINE find(Key x) + { + return dispatch(x, FindCallable{}); + } + + void write(DB::WriteBuffer & wb) const + { + m0.write(wb); + m1.write(wb); + m2.write(wb); + m3.write(wb); + ms.write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + m0.writeText(wb); + DB::writeChar(',', wb); + m1.writeText(wb); + DB::writeChar(',', wb); + m2.writeText(wb); + DB::writeChar(',', wb); + m3.writeText(wb); + DB::writeChar(',', wb); + ms.writeText(wb); + } + + void read(DB::ReadBuffer & rb) + { + m0.read(rb); + m1.read(rb); + m2.read(rb); + m3.read(rb); + ms.read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + m0.readText(rb); + DB::assertChar(',', rb); + m1.readText(rb); + DB::assertChar(',', rb); + m2.readText(rb); + DB::assertChar(',', rb); + m3.readText(rb); + DB::assertChar(',', rb); + ms.readText(rb); + } + + size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); } + + bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); } + + size_t getBufferSizeInBytes() const + { + return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes() + + ms.getBufferSizeInBytes(); + } + + void clearAndShrink() + { + m1.clearHasZero(); + m1.clearAndShrink(); + m2.clearAndShrink(); + m3.clearAndShrink(); + ms.clearAndShrink(); + } +}; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h new file mode 100644 index 00000000000..29bc4b394a7 --- /dev/null +++ b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +template typename ImplTable = StringHashMap> +class TwoLevelStringHashMap : public TwoLevelStringHashTable, ImplTable> +{ +public: + using Key = StringRef; + using key_type = Key; + using Self = TwoLevelStringHashMap; + using Base = TwoLevelStringHashTable, StringHashMap>; + using Base::Base; + using typename Base::Impl; + using mapped_type = TMapped; + using value_type = typename Base::value_type; + + using LookupResult = typename Base::LookupResult; + + template + void ALWAYS_INLINE forEachMapped(Func && func) + { + for (auto i = 0u; i < this->NUM_BUCKETS; ++i) + return this->impls[i].forEachMapped(func); + } + + mapped_type & ALWAYS_INLINE operator[](Key x) + { + bool inserted; + LookupResult it; + emplace(x, it, inserted); + if (inserted) + new (lookupResultGetMapped(it)) mapped_type(); + return *lookupResultGetMapped(it); + } +}; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h new file mode 100644 index 00000000000..ed1e1b01857 --- /dev/null +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -0,0 +1,217 @@ +#pragma once + +#include + +template , size_t BITS_FOR_BUCKET = 8> +class TwoLevelStringHashTable : private boost::noncopyable +{ +protected: + using HashValue = size_t; + using Self = TwoLevelStringHashTable; + +public: + using Key = StringRef; + using Impl = ImplTable; + + static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; + static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + + // TODO: currently hashing contains redundant computations when doing distributed or external aggregations + size_t hash(const Key & x) const + { + return const_cast(*this).dispatch(x, + [&](const auto &, const auto &, size_t hash) { return hash; }); + } + + size_t operator()(const Key & x) const { return hash(x); } + + /// NOTE Bad for hash tables with more than 2^32 cells. + static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } + +public: + using key_type = typename Impl::key_type; + using value_type = typename Impl::value_type; + using LookupResult = typename Impl::LookupResult; + + Impl impls[NUM_BUCKETS]; + + TwoLevelStringHashTable() {} + + template + TwoLevelStringHashTable(const Source & src) + { + if (src.m0.hasZero()) + impls[0].m0.setHasZero(*src.m0.zeroValue()); + + for (auto & v : src.m1) + { + size_t hash_value = v.getHash(src.m1); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m1.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.m2) + { + size_t hash_value = v.getHash(src.m2); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m2.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.m3) + { + size_t hash_value = v.getHash(src.m3); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m3.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.ms) + { + size_t hash_value = v.getHash(src.ms); + size_t buck = getBucketFromHash(hash_value); + impls[buck].ms.insertUniqueNonZero(&v, hash_value); + } + } + + // Dispatch is written in a way that maximizes the performance: + // 1. Always memcpy 8 times bytes + // 2. Use switch case extension to generate fast dispatching table + // 3. Combine hash computation along with bucket computation and key loading + // 4. Funcs are named callables that can be force_inlined + // NOTE: It relies on Little Endianness and SSE4.2 + template + decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) + { + static constexpr StringKey0 key0{}; + const StringRef & x = keyHolderGetKey(key_holder); + size_t sz = x.size; + const char * p = x.data; + // pending bits that needs to be shifted out + char s = (-sz & 7) * 8; + size_t res = -1ULL; + size_t buck; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + StringHashTableHash hash; + switch (sz) + { + case 0: + keyHolderDiscardKey(key_holder); + return func(impls[0].m0, key0, 0); + CASE_1_8 : { + // first half page + if ((reinterpret_cast(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + n[0] &= -1ul >> s; + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + n[0] >>= s; + } + res = hash(k8); + buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(impls[buck].m1, k8, res); + } + CASE_9_16 : { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + n[1] >>= s; + res = hash(k16); + buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(impls[buck].m2, k16, res); + } + CASE_17_24 : { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + n[2] >>= s; + res = hash(k24); + buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(impls[buck].m3, k24, res); + } + default: { + res = hash(x); + buck = getBucketFromHash(res); + return func(impls[buck].ms, std::forward(key_holder), res); + } + } + } + + template + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted}); + } + + LookupResult ALWAYS_INLINE find(Key x) + { + return dispatch(x, typename Impl::FindCallable{}); + } + + void write(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::writeChar(',', wb); + impls[i].writeText(wb); + } + } + + void read(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::assertChar(',', rb); + impls[i].readText(rb); + } + } + + size_t size() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].size(); + + return res; + } + + bool empty() const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + if (!impls[i].empty()) + return false; + + return true; + } + + size_t getBufferSizeInBytes() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].getBufferSizeInBytes(); + + return res; + } +}; diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index a1369e2fa44..0f0faaecb44 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -11,6 +11,9 @@ #include #include #include +#include +#include + #include #include #include @@ -69,12 +72,20 @@ using AggregatedDataWithUInt8Key = FixedHashMap; using AggregatedDataWithUInt16Key = FixedHashMap; using AggregatedDataWithUInt64Key = HashMap>; + +using AggregatedDataWithShortStringKey = StringHashMap; + using AggregatedDataWithStringKey = HashMapWithSavedHash; + using AggregatedDataWithKeys128 = HashMap; using AggregatedDataWithKeys256 = HashMap; using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; + +using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; + using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; + using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap; using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap; @@ -139,6 +150,8 @@ struct AggregationDataWithNullKeyTwoLevel : public Base template using HashTableWithNullKey = AggregationDataWithNullKey>; +template +using StringHashTableWithNullKey = AggregationDataWithNullKey>; using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey; using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey; @@ -149,6 +162,10 @@ using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey, TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; + +using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelStringHashMap>; + using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< TwoLevelHashMapWithSavedHash, TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; @@ -216,6 +233,32 @@ struct AggregationMethodString }; +/// Same as above but without cache +template +struct AggregationMethodStringNoCache +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + + AggregationMethodStringNoCache() {} + + template + AggregationMethodStringNoCache(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodString; + + static const bool low_cardinality_optimization = false; + + static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &) + { + key_columns[0]->insertData(key.data, key.size); + } +}; + + /// For the case where there is one fixed-length string key. template struct AggregationMethodFixedString @@ -241,6 +284,32 @@ struct AggregationMethodFixedString } }; +/// Same as above but without cache +template +struct AggregationMethodFixedStringNoCache +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + + AggregationMethodFixedStringNoCache() {} + + template + AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodFixedString; + + static const bool low_cardinality_optimization = false; + + static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &) + { + key_columns[0]->insertData(key.data, key.size); + } +}; + + /// Single low cardinality column. template struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod @@ -434,16 +503,16 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> key32; std::unique_ptr> key64; - std::unique_ptr> key_string; - std::unique_ptr> key_fixed_string; + std::unique_ptr> key_string; + std::unique_ptr> key_fixed_string; std::unique_ptr> keys128; std::unique_ptr> keys256; std::unique_ptr> serialized; std::unique_ptr> key32_two_level; std::unique_ptr> key64_two_level; - std::unique_ptr> key_string_two_level; - std::unique_ptr> key_fixed_string_two_level; + std::unique_ptr> key_string_two_level; + std::unique_ptr> key_fixed_string_two_level; std::unique_ptr> keys128_two_level; std::unique_ptr> keys256_two_level; std::unique_ptr> serialized_two_level; diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index e272525c7b7..da45c1a5153 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -37,6 +37,12 @@ add_executable (hash_map_string_small hash_map_string_small.cpp) target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) target_link_libraries (hash_map_string_small PRIVATE dbms) +add_executable (string_hash_map string_hash_map.cpp) +target_link_libraries (string_hash_map PRIVATE dbms) + +add_executable (string_hash_map_aggregation string_hash_map.cpp) +target_link_libraries (string_hash_map_aggregation PRIVATE dbms) + add_executable (two_level_hash_map two_level_hash_map.cpp) target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) target_link_libraries (two_level_hash_map PRIVATE dbms) diff --git a/dbms/src/Interpreters/tests/string_hash_map.cpp b/dbms/src/Interpreters/tests/string_hash_map.cpp new file mode 100644 index 00000000000..b16e1a91aa5 --- /dev/null +++ b/dbms/src/Interpreters/tests/string_hash_map.cpp @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + +#include +#include + +using namespace std; + +int main() +{ + std::string s; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0, 25); + std::binomial_distribution binomial1(100, 0.01); + std::binomial_distribution binomial2(100, 0.02); + std::binomial_distribution binomial4(100, 0.04); + std::binomial_distribution binomial8(100, 0.08); + std::binomial_distribution binomial16(100, 0.16); + std::binomial_distribution binomial24(100, 0.24); + std::binomial_distribution binomial48(100, 0.48); + // 11GB + std::ofstream f("/tmp/terms.csv"); + size_t l1, l2, l4, l8, l16, l24, l48; + for (auto n = 0ul; n < 1e8; ++n) + { + l1 = binomial1(rng) + 1; + l2 = binomial2(rng) + l1 + 1; + l4 = binomial4(rng) + l2 + 1; + l8 = binomial8(rng) + l4 + 1; + l16 = binomial16(rng) + l8 + 1; + l24 = binomial24(rng) + l16 + 1; + l48 = binomial48(rng) + l24 + 1; + s.resize(l48); + for (auto i = 0ul; i < l48 - 1; ++i) + s[i] = 'a' + dist(rng); + s[l1 - 1] = ','; + s[l2 - 1] = ','; + s[l4 - 1] = ','; + s[l8 - 1] = ','; + s[l16 - 1] = ','; + s[l24 - 1] = ','; + s[l48 - 1] = '\n'; + f << s; + } + f.close(); + return 0; +} + +create table terms (term1 String, term2 String, term4 String, term8 String, term16 String, term24 String, term48 String) engine TinyLog; +insert into terms select * from file('/tmp/terms.csv', CSV, 'a String, b String, c String, d String, e String, f String, g String'); + +NOTE: for reliable test results, try isolating cpu cores and do python -m perf tune. Also bind numa nodes if any. +# isolate cpu 18 +dir=/home/amos/git/chorigin/data/data/default/terms +for file in term1 term2 term4 term8 term16 term24 term48; do + for size in 30000000 50000000 80000000 100000000; do + BEST_METHOD=0 + BEST_RESULT=0 + for method in {1..2}; do + echo -ne $file $size $method '' + numactl --membind=0 taskset -c 18 ./string_hash_map $size $method <"$dir"/"$file".bin 2>&1 | perl -nE 'say /([0-9\.]+) elem/g if /HashMap/' | tee /tmp/string_hash_map_res + CUR_RESULT=$(cat /tmp/string_hash_map_res | tr -d '.') + if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then + BEST_METHOD=$method + BEST_RESULT=$CUR_RESULT + fi + done + echo Best: $BEST_METHOD - $BEST_RESULT + done +done + +--------------------------- + +term1 30000000 1 68785770.85 term2 30000000 1 42531788.83 term4 30000000 1 14759901.41 term8 30000000 1 8072903.47 +term1 30000000 2 40812128.16 term2 30000000 2 21352402.10 term4 30000000 2 9008907.80 term8 30000000 2 5822641.82 +Best: 1 - 6878577085 Best: 1 - 4253178883 Best: 1 - 1475990141 Best: 1 - 807290347 +term1 50000000 1 68027542.41 term2 50000000 1 40493742.80 term4 50000000 1 16827650.85 term8 50000000 1 7405230.14 +term1 50000000 2 37589806.02 term2 50000000 2 19362975.09 term4 50000000 2 8278094.11 term8 50000000 2 5106810.80 +Best: 1 - 6802754241 Best: 1 - 4049374280 Best: 1 - 1682765085 Best: 1 - 740523014 +term1 80000000 1 68651875.88 term2 80000000 1 38253695.50 term4 80000000 1 15847177.93 term8 80000000 1 7536319.25 +term1 80000000 2 38092189.20 term2 80000000 2 20287003.01 term4 80000000 2 9322770.34 term8 80000000 2 4355572.15 +Best: 1 - 6865187588 Best: 1 - 3825369550 Best: 1 - 1584717793 Best: 1 - 753631925 +term1 100000000 1 68641941.59 term2 100000000 1 39120834.79 term4 100000000 1 16773904.90 term8 100000000 1 7147146.55 +term1 100000000 2 38358006.72 term2 100000000 2 20629363.17 term4 100000000 2 9665201.92 term8 100000000 2 4728255.07 +Best: 1 - 6864194159 Best: 1 - 3912083479 Best: 1 - 1677390490 Best: 1 - 714714655 + + +term16 30000000 1 6823029.35 term24 30000000 1 5706271.14 term48 30000000 1 4695716.47 +term16 30000000 2 5672283.33 term24 30000000 2 5498855.56 term48 30000000 2 4860537.26 +Best: 1 - 682302935 Best: 1 - 570627114 Best: 2 - 486053726 +term16 50000000 1 6214581.25 term24 50000000 1 5249785.66 term48 50000000 1 4282606.12 +term16 50000000 2 4990361.44 term24 50000000 2 4855552.24 term48 50000000 2 4348923.29 +Best: 1 - 621458125 Best: 1 - 524978566 Best: 2 - 434892329 +term16 80000000 1 5382855.70 term24 80000000 1 4580133.04 term48 80000000 1 3779436.15 +term16 80000000 2 4282192.79 term24 80000000 2 4178791.09 term48 80000000 2 3788409.72 +Best: 1 - 538285570 Best: 1 - 458013304 Best: 2 - 378840972 +term16 100000000 1 5930103.42 term24 100000000 1 5030621.52 term48 100000000 1 4084666.73 +term16 100000000 2 4621719.60 term24 100000000 2 4499866.83 term48 100000000 2 4067029.31 +Best: 1 - 593010342 Best: 1 - 503062152 Best: 1 - 408466673 + +*/ + + +using Value = uint64_t; + +template +void NO_INLINE bench(const std::vector & data, DB::Arena &, const char * name) +{ + // warm up + /* + { + Map map; + typename Map::LookupResult it; + bool inserted; + + for (size_t i = 0, size = data.size(); i < size; ++i) + { + auto key_holder = DB::ArenaKeyHolder{data[i], pool}; + map.emplace(key_holder, it, inserted); + if (inserted) + it->getSecond() = 0; + ++it->getSecond(); + } + } + */ + + std::cerr << "method " << name << std::endl; + for (auto t = 0ul; t < 7; ++t) + { + DB::Arena pool(128 * 1024 * 1024); + Stopwatch watch; + Map map; + typename Map::LookupResult it; + bool inserted; + + for (size_t i = 0, size = data.size(); i < size; ++i) + { + map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted); + if (inserted) + *lookupResultGetMapped(it) = 0; + ++*lookupResultGetMapped(it); + } + watch.stop(); + + std::cerr << "arena-memory " << pool.size() + map.getBufferSizeInBytes() << std::endl; + std::cerr << "single-run " << std::setprecision(3) + << watch.elapsedSeconds() << std::endl; + } +} + +/* +template +runFromFile() +{ + DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); + DB::CompressedReadBuffer in2(in1); + + Map map; + DB::Arena pool(128 * 1024 * 1024); + for (size_t i = 0; i < n && !in2.eof(); ++i) + { + auto key = DB::readStringBinaryInto(pool, in2); + + bool inserted; + Map::LookupResult mapped; + map.emplaceKeyHolder(DB::SerializedKeyHolder(key, pool), mapped, inserted); + } +} + +template +benchFromFile() +{ + double best_time = -1.; + for (auto t = 0ul; t < 50; ++t) + { + Stopwatch watch; + runFromFile(); + watch.stop(); + + if (best_time < 0 || best_time > watch.elapsedSeconds()) + { + best_time = watch.elapsedSeconds(); + } + } + + std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Elapsed: " << best_time << " (" << data.size() / best_time + << " elem/sec.)" << std::endl; +} +*/ + + +int main(int argc, char ** argv) +{ + if (argc < 3) + { + std::cerr << "Usage: program n m\n"; + return 1; + } + + size_t n = atoi(argv[1]); + size_t m = atoi(argv[2]); + + DB::Arena pool(128 * 1024 * 1024); + std::vector data(n); + + std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl; + + { + Stopwatch watch; + DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); + DB::CompressedReadBuffer in2(in1); + + std::string tmp; + for (size_t i = 0; i < n && !in2.eof(); ++i) + { + DB::readStringBinary(tmp, in2); + data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size()); + } + + watch.stop(); + std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " (" + << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; + } + + if (!m || m == 1) + bench>(data, pool, "StringHashMap"); + if (!m || m == 2) + bench>(data, pool, "HashMapWithSavedHash"); + if (!m || m == 3) + bench>(data, pool, "HashMap"); + return 0; +} From a0ca932035d5ec7b177efe6f285a7fb1378b7b73 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov <45142681+vitlibar@users.noreply.github.com> Date: Mon, 21 Oct 2019 16:39:55 +0300 Subject: [PATCH 053/122] Fix build and add comments. --- dbms/src/Functions/FunctionsJSON.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h index ca9f9d59136..62eceb7f521 100644 --- a/dbms/src/Functions/FunctionsJSON.h +++ b/dbms/src/Functions/FunctionsJSON.h @@ -315,18 +315,26 @@ public: template class IsValidJSONImpl { +public: static DataTypePtr getType(const char * function_name, const ColumnsWithTypeAndName & arguments) { if (arguments.size() != 1) + { + /// IsValidJSON() shouldn't get parameters other than JSON. throw Exception{"Function " + String(function_name) + " needs exactly one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + } return std::make_shared(); } using Iterator = typename JSONParser::Iterator; static bool addValueToColumn(IColumn & dest, const Iterator &) { - JSONHasImpl::addValueToColumn(dest); + /// This function is called only if JSON is valid. + /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; } static constexpr size_t num_extra_arguments = 0; From e4a936cf4166acc90e3815b8c370a3b1a913aebb Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 16:54:23 +0300 Subject: [PATCH 054/122] Fix add and reload --- dbms/src/Databases/DatabaseOnDisk.cpp | 3 +- .../ExternalDictionariesLoader.cpp | 15 +++----- .../Interpreters/ExternalDictionariesLoader.h | 8 ++-- dbms/src/Interpreters/ExternalLoader.cpp | 37 ++++++++++++------- dbms/src/Interpreters/ExternalLoader.h | 7 ++-- 5 files changed, 37 insertions(+), 33 deletions(-) diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index bb87b18f810..44c73bc51b5 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -302,7 +302,8 @@ void DatabaseOnDisk::createDictionary( /// Load dictionary bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); String dict_name = database.getDatabaseName() + "." + dictionary_name; - context.getExternalDictionariesLoader().reloadSingleDictionary(dict_name, database.getDatabaseName(), query->as(), !lazy_load, !lazy_load); + context.getExternalDictionariesLoader().addDictionaryWithConfig( + dict_name, database.getDatabaseName(), query->as(), !lazy_load); /// If it was ATTACH query and file with table metadata already exist /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one. diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp index 422b41eb336..601e42c56e4 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.cpp @@ -29,17 +29,14 @@ void ExternalDictionariesLoader::addConfigRepository( } -void ExternalDictionariesLoader::reloadSingleDictionary( - const String & name, - const String & repo_name, - const ASTCreateQuery & query, - bool load_never_loading, bool sync) const +void ExternalDictionariesLoader::addDictionaryWithConfig( + const String & dictionary_name, const String & repo_name, const ASTCreateQuery & query, bool load_never_loading) const { - return ExternalLoader::reloadWithConfig( - name, /// names are equal - name, + ExternalLoader::addObjectAndLoad( + dictionary_name, /// names are equal + dictionary_name, repo_name, getDictionaryConfigurationFromAST(query), - "dictionary", load_never_loading, sync); + "dictionary", load_never_loading); } } diff --git a/dbms/src/Interpreters/ExternalDictionariesLoader.h b/dbms/src/Interpreters/ExternalDictionariesLoader.h index f596f602dea..ae2ffc8bcc8 100644 --- a/dbms/src/Interpreters/ExternalDictionariesLoader.h +++ b/dbms/src/Interpreters/ExternalDictionariesLoader.h @@ -36,14 +36,12 @@ public: const std::string & repository_name, std::unique_ptr config_repository); - /// Starts reloading of a specified object. - void reloadSingleDictionary( - const String & name, + void addDictionaryWithConfig( + const String & dictionary_name, const String & repo_name, const ASTCreateQuery & query, - bool load_never_loading = false, - bool sync = false) const; + bool load_never_loading = false) const; protected: diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index f06d891a928..fb5c1b0ed07 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -70,7 +70,7 @@ public: return collectConfigs(); } - ObjectConfigsPtr updateLoadableInfo( + ObjectConfig updateLoadableInfo( const String & external_name, const String & object_name, const String & repo_name, @@ -102,7 +102,7 @@ public: loadable_info.configs.emplace_back(external_name, object_config); loadable_info.last_update_time = Poco::Timestamp{}; /// now loadable_info.in_use = true; - return collectConfigs(); + return object_config; } private: @@ -360,6 +360,12 @@ public: event.notify_all(); } + void setSingleObjectConfigurationWithoutLoading(const String & external_name, const ObjectConfig & config) + { + std::lock_guard lock{mutex}; + infos.emplace(external_name, Info{config}); + } + /// Sets whether all the objects from the configuration should be always loaded (even if they aren't used). void enableAlwaysLoadEverything(bool enable) { @@ -499,7 +505,7 @@ public: void load(LoadResults & loaded_results, Duration timeout = NO_TIMEOUT) { load(allNames, loaded_results, timeout); } /// Starts reloading a specified object. - void reload(const String & name, bool load_never_loading = false, bool sync = false) + void reload(const String & name, bool load_never_loading = false) { std::lock_guard lock{mutex}; Info * info = getInfo(name); @@ -512,7 +518,7 @@ public: { cancelLoading(*info); info->forced_to_reload = true; - startLoading(name, *info, sync); + startLoading(name, *info); } } @@ -735,7 +741,7 @@ private: event.wait_for(lock, timeout, pred); } - void startLoading(const String & name, Info & info, bool sync = false) + void startLoading(const String & name, Info & info) { if (info.loading()) return; @@ -746,7 +752,7 @@ private: info.loading_start_time = std::chrono::system_clock::now(); info.loading_end_time = TimePoint{}; - if (enable_async_loading && !sync) + if (enable_async_loading) { /// Put a job to the thread pool for the loading. auto thread = ThreadFromGlobalPool{&LoadingDispatcher::doLoading, this, name, loading_id, true}; @@ -1130,11 +1136,11 @@ void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const return loading_dispatcher->load(loaded_objects, timeout); } -void ExternalLoader::reload(const String & name, bool load_never_loading, bool sync) const +void ExternalLoader::reload(const String & name, bool load_never_loading) const { auto configs = config_files_reader->read(); loading_dispatcher->setConfiguration(configs); - loading_dispatcher->reload(name, load_never_loading, sync); + loading_dispatcher->reload(name, load_never_loading); } void ExternalLoader::reload(bool load_never_loading) const @@ -1143,18 +1149,21 @@ void ExternalLoader::reload(bool load_never_loading) const loading_dispatcher->reload(load_never_loading); } -void ExternalLoader::reloadWithConfig( +void ExternalLoader::addObjectAndLoad( const String & name, const String & external_name, const String & repo_name, const Poco::AutoPtr & config, const String & key, - bool load_never_loading, - bool sync) const + bool load_never_loading) const { - loading_dispatcher->setConfiguration( - config_files_reader->updateLoadableInfo(external_name, name, repo_name, config, key)); - loading_dispatcher->reload(name, load_never_loading, sync); + auto object_config = config_files_reader->updateLoadableInfo(external_name, name, repo_name, config, key); + loading_dispatcher->setSingleObjectConfigurationWithoutLoading(external_name, object_config); + LoadablePtr loaded_object; + if (load_never_loading) + loading_dispatcher->loadStrict(name, loaded_object); + else + loading_dispatcher->load(name, loaded_object, Duration::zero()); } diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 5a41072f04a..1ae5efdb4b4 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -148,7 +148,7 @@ public: /// `load_never_loading` specifies what to do if the object has never been loading before. /// The function can either skip it (false) or load for the first time (true). /// Also function can load dictionary synchronously - void reload(const String & name, bool load_never_loading = false, bool sync = false) const; + void reload(const String & name, bool load_never_loading = false) const; /// Starts reloading of all the objects. @@ -160,14 +160,13 @@ protected: virtual LoadablePtr create(const String & name, const Poco::Util::AbstractConfiguration & config, const String & key_in_config) const = 0; /// Reload object with already parsed configuration - void reloadWithConfig( + void addObjectAndLoad( const String & name, /// name of dictionary const String & external_name, /// name of source (example xml-file, may contain more than dictionary) const String & repo_name, /// name of repository (database name, or all xml files) const Poco::AutoPtr & config, const String & key_in_config, /// key where we can start search of loadables (, , etc) - bool load_never_loading = false, - bool sync = false) const; + bool load_never_loading = false) const; private: struct ObjectConfig; From 17c2f301e22d4f40faca4d9559b7924b55bb70f3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 17:00:18 +0300 Subject: [PATCH 055/122] Remove accident cerr --- dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 4 +++- dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 09e3abc1ac2..94642ba7924 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -361,9 +361,11 @@ void buildSourceConfiguration(AutoPtr doc, AutoPtr root, cons buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as()); } +/** Check all AST fields are filled, throws exception + * in other case + */ void checkAST(const ASTCreateQuery & query) { - std::cerr << queryToString(query) << std::endl; if (!query.is_dictionary || query.dictionary == nullptr) throw Exception("Cannot convert dictionary to configuration from non-dictionary AST.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h index fdc17d42678..bb48765c492 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.h @@ -9,6 +9,7 @@ using DictionaryConfigurationPtr = Poco::AutoPtr Date: Mon, 21 Oct 2019 17:07:47 +0300 Subject: [PATCH 056/122] Better naming --- dbms/src/Interpreters/ExternalLoader.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index fb5c1b0ed07..5c5e0d44d47 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -322,10 +322,10 @@ public: else { const auto & new_config = new_config_it->second; - if (!isSameConfiguration(*info.config.config, info.config.key_in_config, *new_config.config, new_config.key_in_config)) + if (!isSameConfiguration(*info.object_config.config, info.object_config.key_in_config, *new_config.config, new_config.key_in_config)) { /// Configuration has been changed. - info.config = new_config; + info.object_config = new_config; info.config_changed = true; if (info.wasLoading()) @@ -610,7 +610,7 @@ public: private: struct Info { - Info(const ObjectConfig & config_) : config(config_) {} + Info(const ObjectConfig & object_config_) : object_config(object_config_) {} bool loaded() const { return object != nullptr; } bool failed() const { return !object && exception; } @@ -642,12 +642,12 @@ private: result.exception = exception; result.loading_start_time = loading_start_time; result.loading_duration = loadingDuration(); - result.origin = config.config_path; - result.repository_name = config.repository_name; + result.origin = object_config.config_path; + result.repository_name = object_config.repository_name; return result; } - ObjectConfig config; + ObjectConfig object_config; LoadablePtr object; TimePoint loading_start_time; TimePoint loading_end_time; @@ -797,7 +797,7 @@ private: if (!info || !info->loading() || (info->loading_id != loading_id)) return; - ObjectConfig config = info->config; + ObjectConfig config = info->object_config; bool config_changed = info->config_changed; LoadablePtr previous_version = info->object; size_t error_count = info->error_count; From f7043c38bde1eec67efd678835e9257acec7b79b Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 17:20:42 +0300 Subject: [PATCH 057/122] Less diff --- dbms/src/Databases/IDatabase.h | 2 +- dbms/src/Dictionaries/IDictionary.h | 4 ++-- dbms/src/Dictionaries/IDictionary_fwd.h | 14 -------------- dbms/src/Interpreters/ExternalLoader.cpp | 1 - .../Interpreters/IExternalLoaderConfigRepository.h | 1 - dbms/src/Interpreters/InterpreterCreateQuery.cpp | 2 -- 6 files changed, 3 insertions(+), 21 deletions(-) delete mode 100644 dbms/src/Dictionaries/IDictionary_fwd.h diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index b4985dc066b..65bcc62bfdb 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/dbms/src/Dictionaries/IDictionary.h b/dbms/src/Dictionaries/IDictionary.h index def5d58e243..9ce7c569f75 100644 --- a/dbms/src/Dictionaries/IDictionary.h +++ b/dbms/src/Dictionaries/IDictionary.h @@ -16,7 +16,7 @@ namespace DB { struct IDictionaryBase; -using DictionaryPtr = std::shared_ptr; +using DictionaryPtr = std::unique_ptr; struct DictionaryStructure; class ColumnString; @@ -57,7 +57,7 @@ struct IDictionaryBase : public IExternalLoadable virtual std::exception_ptr getLastException() const { return {}; } - DictionaryPtr shared_from_this() + std::shared_ptr shared_from_this() { return std::static_pointer_cast(IExternalLoadable::shared_from_this()); } diff --git a/dbms/src/Dictionaries/IDictionary_fwd.h b/dbms/src/Dictionaries/IDictionary_fwd.h deleted file mode 100644 index 864dad217b6..00000000000 --- a/dbms/src/Dictionaries/IDictionary_fwd.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -struct IDictionaryBase; -using DictionaryPtr = std::shared_ptr; - -} diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5c5e0d44d47..5f0ccc9ae90 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -761,7 +761,6 @@ private: else { /// Perform the loading immediately. - /// Deadlock when we try to load dictionary from dictionary on localhost doLoading(name, loading_id, false); } } diff --git a/dbms/src/Interpreters/IExternalLoaderConfigRepository.h b/dbms/src/Interpreters/IExternalLoaderConfigRepository.h index efb91815555..93cefe0a0d4 100644 --- a/dbms/src/Interpreters/IExternalLoaderConfigRepository.h +++ b/dbms/src/Interpreters/IExternalLoaderConfigRepository.h @@ -13,7 +13,6 @@ namespace DB using LoadablesConfigurationPtr = Poco::AutoPtr; - /// Base interface for configurations source for Loadble objects, which can be /// loaded with ExternalLoader. Configurations may came from filesystem (XML-files), /// server memory (from database), etc. It's important that main result of this class diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 61a2f18d8e1..68701e22e8b 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -43,8 +43,6 @@ #include #include -#include - #include #include From 307f711500589f5b2727027e5108d878c42767c3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 17:25:21 +0300 Subject: [PATCH 058/122] Fix db mysql dictionaries iterator --- dbms/src/Databases/DatabaseMySQL.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index 938d7c4ec77..bc023b51282 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -33,7 +34,7 @@ public: DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context &, const FilterByNameFunction & = {}) override { - return nullptr; + return std::make_unique(); } ASTPtr getCreateDatabaseQuery(const Context & context) const override; From d865aeba9be20b1077a7c1d3bccb891494ebaa98 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 21 Oct 2019 17:54:53 +0300 Subject: [PATCH 059/122] Review fixes. --- dbms/src/Common/HashTable/StringHashTable.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index e8df4ec0fa3..7ea17acb316 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -330,10 +330,13 @@ public: struct FindCallable { - template - LookupResult ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) + // find() doesn't need any key memory management, so we don't work with + // any key holders here, only with normal keys. The key type is still + // different for every subtable, this is why it is a template parameter. + template + LookupResult ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash) { - return lookupResultGetMapped(map.find(keyHolderGetKey(key_holder), hash)); + return lookupResultGetMapped(map.find(key, hash)); } }; From ff7850cfbd8071dea13bb69ffda7260ad8cf9782 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 17:59:35 +0300 Subject: [PATCH 060/122] Remove strange code in favor of less strange code --- dbms/programs/server/Server.cpp | 8 ++++++-- dbms/src/Interpreters/Context.cpp | 16 +--------------- .../ExternalLoaderDatabaseConfigRepository.h | 4 ++++ dbms/src/Interpreters/ExternalModelsLoader.cpp | 8 +++++--- dbms/src/Interpreters/ExternalModelsLoader.h | 8 +++++--- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index a9a088f80dd..e274dca6bdc 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -922,8 +923,11 @@ int Server::main(const std::vector & /*args*/) global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true); } - auto config_repository = std::make_unique(config(), "dictionaries_config"); - global_context->getExternalDictionariesLoader().addConfigRepository("", std::move(config_repository)); + auto dictionaries_repository = std::make_unique(config(), "dictionaries_config"); + global_context->getExternalDictionariesLoader().addConfigRepository("", std::move(dictionaries_repository)); + + auto models_repository = std::make_unique(config(), "models_config"); + global_context->getExternalModelsLoader().addConfigRepository("", std::move(models_repository)); } catch (...) { diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 57971e446c9..3658b476c8f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1315,12 +1315,6 @@ EmbeddedDictionaries & Context::getEmbeddedDictionaries() const ExternalDictionariesLoader & Context::getExternalDictionariesLoader() const { - { - std::lock_guard lock(shared->external_dictionaries_mutex); - if (shared->external_dictionaries_loader) - return *shared->external_dictionaries_loader; - } - std::lock_guard lock(shared->external_dictionaries_mutex); if (!shared->external_dictionaries_loader) { @@ -1340,21 +1334,13 @@ ExternalDictionariesLoader & Context::getExternalDictionariesLoader() const ExternalModelsLoader & Context::getExternalModelsLoader() const { - { - std::lock_guard lock(shared->external_models_mutex); - if (shared->external_models_loader) - return *shared->external_models_loader; - } - - const auto & config = getConfigRef(); std::lock_guard lock(shared->external_models_mutex); if (!shared->external_models_loader) { if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = std::make_unique(config, "models_config"); - shared->external_models_loader.emplace(std::move(config_repository), *this->global_context); + shared->external_models_loader.emplace(*this->global_context); } return *shared->external_models_loader; } diff --git a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h index 4f48f882f82..343ed8cf038 100644 --- a/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h +++ b/dbms/src/Interpreters/ExternalLoaderDatabaseConfigRepository.h @@ -6,6 +6,9 @@ namespace DB { + +/// Repository from database, which stores dictionary definitions on disk. +/// Tracks update time and existance of .sql files through IDatabase. class ExternalLoaderDatabaseConfigRepository : public IExternalLoaderConfigRepository { public: @@ -27,4 +30,5 @@ private: DatabasePtr database; Context context; }; + } diff --git a/dbms/src/Interpreters/ExternalModelsLoader.cpp b/dbms/src/Interpreters/ExternalModelsLoader.cpp index 690b7d920ca..6bdf8341906 100644 --- a/dbms/src/Interpreters/ExternalModelsLoader.cpp +++ b/dbms/src/Interpreters/ExternalModelsLoader.cpp @@ -10,12 +10,10 @@ namespace ErrorCodes } -ExternalModelsLoader::ExternalModelsLoader( - ExternalLoaderConfigRepositoryPtr config_repository, Context & context_) +ExternalModelsLoader::ExternalModelsLoader(Context & context_) : ExternalLoader("external model", &Logger::get("ExternalModelsLoader")) , context(context_) { - addConfigRepository("_XMLConfigRepository", std::move(config_repository), {"model", "name"}); enablePeriodicUpdates(true); } @@ -40,4 +38,8 @@ std::shared_ptr ExternalModelsLoader::create( } } +void ExternalModelsLoader::addConfigRepository(const String & name, std::unique_ptr config_repository) +{ + ExternalLoader::addConfigRepository(name, std::move(config_repository), {"models", "name"}); +} } diff --git a/dbms/src/Interpreters/ExternalModelsLoader.h b/dbms/src/Interpreters/ExternalModelsLoader.h index fa860d08b2b..0e95828bd77 100644 --- a/dbms/src/Interpreters/ExternalModelsLoader.h +++ b/dbms/src/Interpreters/ExternalModelsLoader.h @@ -18,15 +18,17 @@ public: using ModelPtr = std::shared_ptr; /// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds. - ExternalModelsLoader( - ExternalLoaderConfigRepositoryPtr config_repository, - Context & context_); + ExternalModelsLoader(Context & context_); ModelPtr getModel(const std::string & name) const { return std::static_pointer_cast(getLoadable(name)); } + void addConfigRepository(const String & name, + std::unique_ptr config_repository); + + protected: LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config) const override; From c721a4ee105e9ad9a7fb2330f52afc4c51cb7c93 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 21 Oct 2019 23:13:53 +0800 Subject: [PATCH 061/122] Remove experimental pr-*** rule from labeler --- .github/labeler.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 02d824581c5..72cf714f039 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,7 +1,3 @@ -# Build changes -pr-build: - - "**/CMakeLists.txt" - # Documentation PRs documentation: - "**/*.md" From 10f69eef03cc00651befb6a1251e9f11d7f7e54c Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 19:05:45 +0300 Subject: [PATCH 062/122] Add tests for non standart dictionaries and fix bugs --- dbms/src/Dictionaries/CacheDictionary.cpp | 2 +- .../ComplexKeyCacheDictionary.cpp | 2 +- .../ComplexKeyHashedDictionary.cpp | 2 +- dbms/src/Dictionaries/DictionaryFactory.cpp | 5 ++- dbms/src/Dictionaries/DictionaryFactory.h | 6 +++- dbms/src/Dictionaries/FlatDictionary.cpp | 2 +- dbms/src/Dictionaries/HashedDictionary.cpp | 4 +-- .../Dictionaries/RangeHashedDictionary.cpp | 2 +- dbms/src/Dictionaries/TrieDictionary.cpp | 2 +- .../getDictionaryConfigurationFromAST.cpp | 5 +-- .../tests/gtest_dictionary_configuration.cpp | 32 +++++++++++++++++-- 11 files changed, 49 insertions(+), 15 deletions(-) diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index c3a78150f05..57498221141 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -611,7 +611,7 @@ void registerDictionaryCache(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); }; - factory.registerLayout("cache", create_layout); + factory.registerLayout("cache", create_layout, false); } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 3478e631076..b27adc20636 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -415,7 +415,7 @@ void registerDictionaryComplexKeyCache(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); }; - factory.registerLayout("complex_key_cache", create_layout); + factory.registerLayout("complex_key_cache", create_layout, true); } diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index 5e08ce3295e..41a5caaa768 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -755,7 +755,7 @@ void registerDictionaryComplexKeyHashed(DictionaryFactory & factory) const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; - factory.registerLayout("complex_key_hashed", create_layout); + factory.registerLayout("complex_key_hashed", create_layout, true); } diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index b51974b5ae6..52703ce36d1 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -13,10 +13,13 @@ namespace ErrorCodes extern const int UNKNOWN_ELEMENT_IN_CONFIG; } -void DictionaryFactory::registerLayout(const std::string & layout_type, Creator create_layout) +void DictionaryFactory::registerLayout(const std::string & layout_type, Creator create_layout, bool is_complex) { if (!registered_layouts.emplace(layout_type, std::move(create_layout)).second) throw Exception("DictionaryFactory: the layout name '" + layout_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); + + layout_complexity[layout_type] = is_complex; + } diff --git a/dbms/src/Dictionaries/DictionaryFactory.h b/dbms/src/Dictionaries/DictionaryFactory.h index fd7978f590f..d9efd3f42f1 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.h +++ b/dbms/src/Dictionaries/DictionaryFactory.h @@ -45,11 +45,15 @@ public: const std::string & config_prefix, DictionarySourcePtr source_ptr)>; - void registerLayout(const std::string & layout_type, Creator create_layout); + bool isComplex(const std::string & layout_type) const { return layout_complexity.at(layout_type); } + + void registerLayout(const std::string & layout_type, Creator create_layout, bool is_complex); private: using LayoutRegistry = std::unordered_map; LayoutRegistry registered_layouts; + using LayoutComplexity = std::unordered_map; + LayoutComplexity layout_complexity; }; } diff --git a/dbms/src/Dictionaries/FlatDictionary.cpp b/dbms/src/Dictionaries/FlatDictionary.cpp index d1c6a138c89..68afdd355b8 100644 --- a/dbms/src/Dictionaries/FlatDictionary.cpp +++ b/dbms/src/Dictionaries/FlatDictionary.cpp @@ -724,7 +724,7 @@ void registerDictionaryFlat(DictionaryFactory & factory) const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; - factory.registerLayout("flat", create_layout); + factory.registerLayout("flat", create_layout, false); } diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index d81b259b184..1c6fd602ba9 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -787,8 +787,8 @@ void registerDictionaryHashed(DictionaryFactory & factory) const bool sparse = name == "sparse_hashed"; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty, sparse); }; - factory.registerLayout("hashed", create_layout); - factory.registerLayout("sparse_hashed", create_layout); + factory.registerLayout("hashed", create_layout, false); + factory.registerLayout("sparse_hashed", create_layout, false); } } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index 3fb29d747c0..ae67027c210 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -691,7 +691,7 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory) const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; - factory.registerLayout("range_hashed", create_layout); + factory.registerLayout("range_hashed", create_layout, false); } } diff --git a/dbms/src/Dictionaries/TrieDictionary.cpp b/dbms/src/Dictionaries/TrieDictionary.cpp index 7a5adee63b6..f7f4f8c33a6 100644 --- a/dbms/src/Dictionaries/TrieDictionary.cpp +++ b/dbms/src/Dictionaries/TrieDictionary.cpp @@ -767,7 +767,7 @@ void registerDictionaryTrie(DictionaryFactory & factory) // This is specialised trie for storing IPv4 and IPv6 prefixes. return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; - factory.registerLayout("ip_trie", create_layout); + factory.registerLayout("ip_trie", create_layout, true); } } diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 94642ba7924..08b523ae10f 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -411,7 +412,7 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer Names pk_columns = getPrimaryKeyColumns(query.dictionary->primary_key); auto dictionary_layout = query.dictionary->layout; - bool complex = startsWith(dictionary_layout->layout_type, "complex"); + bool complex = DictionaryFactory::instance().isComplex(dictionary_layout->layout_type); buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_columns); @@ -422,7 +423,7 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime); if (query.dictionary->range) - buildRangeConfiguration(xml_document, current_dictionary, query.dictionary->range); + buildRangeConfiguration(xml_document, structure_element, query.dictionary->range); conf->load(xml_document); return conf; diff --git a/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index b2fbdf70479..7cda09a259a 100644 --- a/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/dbms/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -14,11 +14,13 @@ #include #include #include +#include #include using namespace DB; +static bool registered = false; /// For debug std::string configurationToString(const DictionaryConfigurationPtr & config) { @@ -30,6 +32,12 @@ std::string configurationToString(const DictionaryConfigurationPtr & config) TEST(ConvertDictionaryAST, SimpleDictConfiguration) { + if (!registered) + { + registerDictionaries(); + registered = true; + } + String input = " CREATE DICTIONARY test.dict1" " (" " key_column UInt64 DEFAULT 0," @@ -55,8 +63,8 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) EXPECT_EQ(config->getInt("dictionary.lifetime.max"), 10); /// range - EXPECT_EQ(config->getString("dictionary.range_min"), "second_column"); - EXPECT_EQ(config->getString("dictionary.range_max"), "third_column"); + EXPECT_EQ(config->getString("dictionary.structure.range_min"), "second_column"); + EXPECT_EQ(config->getString("dictionary.structure.range_max"), "third_column"); /// source EXPECT_EQ(config->getString("dictionary.source.clickhouse.host"), "localhost"); @@ -70,7 +78,7 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) Poco::Util::AbstractConfiguration::Keys keys; config->keys("dictionary.structure", keys); - EXPECT_EQ(keys.size(), 3); + EXPECT_EQ(keys.size(), 5); /// + ranged keys EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".name"), "second_column"); EXPECT_EQ(config->getString("dictionary.structure." + keys[0] + ".type"), "UInt8"); EXPECT_EQ(config->getInt("dictionary.structure." + keys[0] + ".null_value"), 1); @@ -89,6 +97,12 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) TEST(ConvertDictionaryAST, TrickyAttributes) { + if (!registered) + { + registerDictionaries(); + registered = true; + } + String input = " CREATE DICTIONARY dict2" " (" " key_column UInt64 IS_OBJECT_ID," @@ -127,6 +141,12 @@ TEST(ConvertDictionaryAST, TrickyAttributes) TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) { + if (!registered) + { + registerDictionaries(); + registered = true; + } + String input = " CREATE DICTIONARY dict4" " (" " key_column1 String," @@ -172,6 +192,12 @@ TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) TEST(ConvertDictionaryAST, ComplexSource) { + if (!registered) + { + registerDictionaries(); + registered = true; + } + String input = " CREATE DICTIONARY dict4" " (" " key_column UInt64," From a860ee78911ea2c6efd2a8268108423295be50dc Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 16 Oct 2019 01:15:07 +0800 Subject: [PATCH 063/122] Fix preciseExp10 --- .../queries/0_stateless/00534_exp10.reference | 1311 +++++++++++++---- .../tests/queries/0_stateless/00534_exp10.sql | 3 +- libs/libcommon/src/preciseExp10.c | 82 +- 3 files changed, 1049 insertions(+), 347 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00534_exp10.reference b/dbms/tests/queries/0_stateless/00534_exp10.reference index bba2c07fb64..5f55c6a4420 100644 --- a/dbms/tests/queries/0_stateless/00534_exp10.reference +++ b/dbms/tests/queries/0_stateless/00534_exp10.reference @@ -1,310 +1,1001 @@ -0 1 -1 10 -2 100 -3 1000 -4 10000 -5 100000 -6 1000000 -7 10000000 -8 100000000 -9 1000000000 -10 10000000000 -11 100000000000 -12 1000000000000 -13 10000000000000 -14 100000000000000 -15 1000000000000000 -16 10000000000000000 -17 100000000000000000 -18 1000000000000000000 -19 10000000000000000000 -20 100000000000000000000 -21 1e21 -22 1e22 -23 1e23 -24 1e24 -25 1e25 -26 1e26 -27 1e27 -28 1e28 -29 1e29 -30 1e30 -31 1e31 -32 1e32 -33 1e33 -34 1e34 -35 1e35 -36 1e36 -37 1e37 -38 1e38 -39 1e39 -40 1e40 -41 1e41 -42 1e42 -43 1e43 -44 1e44 -45 1e45 -46 1e46 -47 1e47 -48 1e48 -49 1e49 -50 1e50 -51 1e51 -52 1e52 -53 1e53 -54 1e54 -55 1e55 -56 1e56 -57 1e57 -58 1e58 -59 1e59 -60 1e60 -61 1e61 -62 1e62 -63 1e63 -64 1e64 -65 1e65 -66 1e66 -67 1e67 -68 1e68 -69 1e69 -70 1e70 -71 1e71 -72 1e72 -73 1e73 -74 1e74 -75 1e75 -76 1e76 -77 1e77 -78 1e78 -79 1e79 -80 1e80 -81 1e81 -82 1e82 -83 1e83 -84 1e84 -85 1e85 -86 1e86 -87 1e87 -88 1e88 -89 1e89 -90 1e90 -91 1e91 -92 1e92 -93 1e93 -94 1e94 -95 1e95 -96 1e96 -97 1e97 -98 1e98 -99 1e99 -100 1e100 -101 1e101 -102 1e102 -103 1e103 -104 1e104 -105 1e105 -106 1e106 -107 1e107 -108 1e108 -109 1e109 -110 1e110 -111 1e111 -112 1e112 -113 1e113 -114 1e114 -115 1e115 -116 1e116 -117 1e117 -118 1e118 -119 1e119 -120 1e120 -121 1e121 -122 1e122 -123 1e123 -124 1e124 -125 1e125 -126 1e126 -127 1e127 -128 1e128 -129 1e129 -130 1e130 -131 1e131 -132 1e132 -133 1e133 -134 1e134 -135 1e135 -136 1e136 -137 1e137 -138 1e138 -139 1e139 -140 1e140 -141 1e141 -142 1e142 -143 1e143 -144 1e144 -145 1e145 -146 1e146 -147 1e147 -148 1e148 -149 1e149 -150 1e150 -151 1e151 -152 1e152 -153 1e153 -154 1e154 -155 1e155 -156 1e156 -157 1e157 -158 1e158 -159 1e159 -160 1e160 -161 1e161 -162 1e162 -163 1e163 -164 1e164 -165 1e165 -166 1e166 -167 1e167 -168 1e168 -169 1e169 -170 1e170 -171 1e171 -172 1e172 -173 1e173 -174 1e174 -175 1e175 -176 1e176 -177 1e177 -178 1e178 -179 1e179 -180 1e180 -181 1e181 -182 1e182 -183 1e183 -184 1e184 -185 1e185 -186 1e186 -187 1e187 -188 1e188 -189 1e189 -190 1e190 -191 1e191 -192 1e192 -193 1e193 -194 1e194 -195 1e195 -196 1e196 -197 1e197 -198 1e198 -199 1e199 -200 1e200 -201 1e201 -202 1e202 -203 1e203 -204 1e204 -205 1e205 -206 1e206 -207 1e207 -208 1e208 -209 1e209 -210 1e210 -211 1e211 -212 1e212 -213 1e213 -214 1e214 -215 1e215 -216 1e216 -217 1e217 -218 1e218 -219 1e219 -220 1e220 -221 1e221 -222 1e222 -223 1e223 -224 1e224 -225 1e225 -226 1e226 -227 1e227 -228 1e228 -229 1e229 -230 1e230 -231 1e231 -232 1e232 -233 1e233 -234 1e234 -235 1e235 -236 1e236 -237 1e237 -238 1e238 -239 1e239 -240 1e240 -241 1e241 -242 1e242 -243 1e243 -244 1e244 -245 1e245 -246 1e246 -247 1e247 -248 1e248 -249 1e249 -250 1e250 -251 1e251 -252 1e252 -253 1e253 -254 1e254 -255 1e255 -256 1e256 -257 1e257 -258 1e258 -259 1e259 -260 1e260 -261 1e261 -262 1e262 -263 1e263 -264 1e264 -265 1e265 -266 1e266 -267 1e267 -268 1e268 -269 1e269 -270 1e270 -271 1e271 -272 1e272 -273 1e273 -274 1e274 -275 1e275 -276 1e276 -277 1e277 -278 1e278 -279 1e279 -280 1e280 -281 1e281 -282 1e282 -283 1e283 -284 1e284 -285 1e285 -286 1e286 -287 1e287 -288 1e288 -289 1e289 -290 1e290 -291 1e291 -292 1e292 -293 1e293 -294 1e294 -295 1e295 -296 1e296 -297 1e297 -298 1e298 -299 1e299 -300 1e300 -301 1e301 -302 1e302 -303 1e303 -304 1e304 -305 1e305 -306 1e306 -307 1e307 -308 1e308 -309 inf +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +10 0 +11 0 +12 0 +13 0 +14 0 +15 0 +16 0 +17 0 +18 0 +19 0 +20 0 +21 0 +22 0 +23 0 +24 0 +25 0 +26 0 +27 0 +28 0 +29 0 +30 0 +31 0 +32 0 +33 0 +34 0 +35 0 +36 0 +37 0 +38 0 +39 0 +40 0 +41 0 +42 0 +43 0 +44 0 +45 0 +46 0 +47 0 +48 0 +49 0 +50 0 +51 0 +52 0 +53 0 +54 0 +55 0 +56 0 +57 0 +58 0 +59 0 +60 0 +61 0 +62 0 +63 0 +64 0 +65 0 +66 0 +67 0 +68 0 +69 0 +70 0 +71 0 +72 0 +73 0 +74 0 +75 0 +76 0 +77 0 +78 0 +79 0 +80 0 +81 0 +82 0 +83 0 +84 0 +85 0 +86 0 +87 0 +88 0 +89 0 +90 0 +91 0 +92 0 +93 0 +94 0 +95 0 +96 0 +97 0 +98 0 +99 0 +100 0 +101 0 +102 0 +103 0 +104 0 +105 0 +106 0 +107 0 +108 0 +109 0 +110 0 +111 0 +112 0 +113 0 +114 0 +115 0 +116 0 +117 0 +118 0 +119 0 +120 0 +121 0 +122 0 +123 0 +124 0 +125 0 +126 0 +127 0 +128 0 +129 0 +130 0 +131 0 +132 0 +133 0 +134 0 +135 0 +136 0 +137 0 +138 0 +139 0 +140 0 +141 0 +142 0 +143 0 +144 0 +145 0 +146 0 +147 0 +148 0 +149 0 +150 0 +151 0 +152 0 +153 0 +154 0 +155 0 +156 0 +157 0 +158 0 +159 0 +160 0 +161 0 +162 0 +163 0 +164 0 +165 0 +166 0 +167 0 +168 0 +169 0 +170 0 +171 0 +172 0 +173 0 +174 0 +175 0 +176 0 +177 1e-323 +178 1e-322 +179 1e-321 +180 1e-320 +181 1e-319 +182 1e-318 +183 1e-317 +184 1e-316 +185 1e-315 +186 1e-314 +187 1e-313 +188 1e-312 +189 1e-311 +190 1e-310 +191 1e-309 +192 1e-308 +193 1e-307 +194 1e-306 +195 1e-305 +196 1e-304 +197 1e-303 +198 1e-302 +199 1e-301 +200 1e-300 +201 1e-299 +202 1e-298 +203 1e-297 +204 1e-296 +205 1e-295 +206 1e-294 +207 1e-293 +208 1e-292 +209 1e-291 +210 1e-290 +211 1e-289 +212 1e-288 +213 1e-287 +214 1e-286 +215 1e-285 +216 1e-284 +217 1e-283 +218 1e-282 +219 1e-281 +220 1e-280 +221 1e-279 +222 1e-278 +223 1e-277 +224 1e-276 +225 1e-275 +226 1e-274 +227 1e-273 +228 1e-272 +229 1e-271 +230 1e-270 +231 1e-269 +232 1e-268 +233 1e-267 +234 1e-266 +235 1e-265 +236 1e-264 +237 1e-263 +238 1e-262 +239 1e-261 +240 1e-260 +241 1e-259 +242 1e-258 +243 1e-257 +244 1e-256 +245 1e-255 +246 1e-254 +247 1e-253 +248 1e-252 +249 1e-251 +250 1e-250 +251 1e-249 +252 1e-248 +253 1e-247 +254 1e-246 +255 1e-245 +256 1e-244 +257 1e-243 +258 1e-242 +259 1e-241 +260 1e-240 +261 1e-239 +262 1e-238 +263 1e-237 +264 1e-236 +265 1e-235 +266 1e-234 +267 1e-233 +268 1e-232 +269 1e-231 +270 1e-230 +271 1e-229 +272 1e-228 +273 1e-227 +274 1e-226 +275 1e-225 +276 1e-224 +277 1e-223 +278 1e-222 +279 1e-221 +280 1e-220 +281 1e-219 +282 1e-218 +283 1e-217 +284 1e-216 +285 1e-215 +286 1e-214 +287 1e-213 +288 1e-212 +289 1e-211 +290 1e-210 +291 1e-209 +292 1e-208 +293 1e-207 +294 1e-206 +295 1e-205 +296 1e-204 +297 1e-203 +298 1e-202 +299 1e-201 +300 1e-200 +301 1e-199 +302 1e-198 +303 1e-197 +304 1e-196 +305 1e-195 +306 1e-194 +307 1e-193 +308 1e-192 +309 1e-191 +310 1e-190 +311 1e-189 +312 1e-188 +313 1e-187 +314 1e-186 +315 1e-185 +316 1e-184 +317 1e-183 +318 1e-182 +319 1e-181 +320 1e-180 +321 1e-179 +322 1e-178 +323 1e-177 +324 1e-176 +325 1e-175 +326 1e-174 +327 1e-173 +328 1e-172 +329 1e-171 +330 1e-170 +331 1e-169 +332 1e-168 +333 1e-167 +334 1e-166 +335 1e-165 +336 1e-164 +337 1e-163 +338 1e-162 +339 1e-161 +340 1e-160 +341 1e-159 +342 1e-158 +343 1e-157 +344 1e-156 +345 1e-155 +346 1e-154 +347 1e-153 +348 1e-152 +349 1e-151 +350 1e-150 +351 1e-149 +352 1e-148 +353 1e-147 +354 1e-146 +355 1e-145 +356 1e-144 +357 1e-143 +358 1e-142 +359 1e-141 +360 1e-140 +361 1e-139 +362 1e-138 +363 1e-137 +364 1e-136 +365 1e-135 +366 1e-134 +367 1e-133 +368 1e-132 +369 1e-131 +370 1e-130 +371 1e-129 +372 1e-128 +373 1e-127 +374 1e-126 +375 1e-125 +376 1e-124 +377 1e-123 +378 1e-122 +379 1e-121 +380 1e-120 +381 1e-119 +382 1e-118 +383 1e-117 +384 1e-116 +385 1e-115 +386 1e-114 +387 1e-113 +388 1e-112 +389 1e-111 +390 1e-110 +391 1e-109 +392 1e-108 +393 1e-107 +394 1e-106 +395 1e-105 +396 1e-104 +397 1e-103 +398 1e-102 +399 1e-101 +400 1e-100 +401 1e-99 +402 1e-98 +403 1e-97 +404 1e-96 +405 1e-95 +406 1e-94 +407 1e-93 +408 1e-92 +409 1e-91 +410 1e-90 +411 1e-89 +412 1e-88 +413 1e-87 +414 1e-86 +415 1e-85 +416 1e-84 +417 1e-83 +418 1e-82 +419 1e-81 +420 1e-80 +421 1e-79 +422 1e-78 +423 1e-77 +424 1e-76 +425 1e-75 +426 1e-74 +427 1e-73 +428 1e-72 +429 1e-71 +430 1e-70 +431 1e-69 +432 1e-68 +433 1e-67 +434 1e-66 +435 1e-65 +436 1e-64 +437 1e-63 +438 1e-62 +439 1e-61 +440 1e-60 +441 1e-59 +442 1e-58 +443 1e-57 +444 1e-56 +445 1e-55 +446 1e-54 +447 1e-53 +448 1e-52 +449 1e-51 +450 1e-50 +451 1e-49 +452 1e-48 +453 1e-47 +454 1e-46 +455 1e-45 +456 1e-44 +457 1e-43 +458 1e-42 +459 1e-41 +460 1e-40 +461 1e-39 +462 1e-38 +463 1e-37 +464 1e-36 +465 1e-35 +466 1e-34 +467 1e-33 +468 1e-32 +469 1e-31 +470 1e-30 +471 1e-29 +472 1e-28 +473 1e-27 +474 1e-26 +475 1e-25 +476 1e-24 +477 1e-23 +478 1e-22 +479 1e-21 +480 1e-20 +481 1e-19 +482 1e-18 +483 1e-17 +484 1e-16 +485 1e-15 +486 1e-14 +487 1e-13 +488 1e-12 +489 1e-11 +490 1e-10 +491 1e-9 +492 1e-8 +493 1e-7 +494 0.000001 +495 0.00001 +496 0.0001 +497 0.001 +498 0.01 +499 0.1 +500 1 +501 10 +502 100 +503 1000 +504 10000 +505 100000 +506 1000000 +507 10000000 +508 100000000 +509 1000000000 +510 10000000000 +511 100000000000 +512 1000000000000 +513 10000000000000 +514 100000000000000 +515 1000000000000000 +516 10000000000000000 +517 100000000000000000 +518 1000000000000000000 +519 10000000000000000000 +520 100000000000000000000 +521 1e21 +522 1e22 +523 1e23 +524 1e24 +525 1e25 +526 1e26 +527 1e27 +528 1e28 +529 1e29 +530 1e30 +531 1e31 +532 1e32 +533 1e33 +534 1e34 +535 1e35 +536 1e36 +537 1e37 +538 1e38 +539 1e39 +540 1e40 +541 1e41 +542 1e42 +543 1e43 +544 1e44 +545 1e45 +546 1e46 +547 1e47 +548 1e48 +549 1e49 +550 1e50 +551 1e51 +552 1e52 +553 1e53 +554 1e54 +555 1e55 +556 1e56 +557 1e57 +558 1e58 +559 1e59 +560 1e60 +561 1e61 +562 1e62 +563 1e63 +564 1e64 +565 1e65 +566 1e66 +567 1e67 +568 1e68 +569 1e69 +570 1e70 +571 1e71 +572 1e72 +573 1e73 +574 1e74 +575 1e75 +576 1e76 +577 1e77 +578 1e78 +579 1e79 +580 1e80 +581 1e81 +582 1e82 +583 1e83 +584 1e84 +585 1e85 +586 1e86 +587 1e87 +588 1e88 +589 1e89 +590 1e90 +591 1e91 +592 1e92 +593 1e93 +594 1e94 +595 1e95 +596 1e96 +597 1e97 +598 1e98 +599 1e99 +600 1e100 +601 1e101 +602 1e102 +603 1e103 +604 1e104 +605 1e105 +606 1e106 +607 1e107 +608 1e108 +609 1e109 +610 1e110 +611 1e111 +612 1e112 +613 1e113 +614 1e114 +615 1e115 +616 1e116 +617 1e117 +618 1e118 +619 1e119 +620 1e120 +621 1e121 +622 1e122 +623 1e123 +624 1e124 +625 1e125 +626 1e126 +627 1e127 +628 1e128 +629 1e129 +630 1e130 +631 1e131 +632 1e132 +633 1e133 +634 1e134 +635 1e135 +636 1e136 +637 1e137 +638 1e138 +639 1e139 +640 1e140 +641 1e141 +642 1e142 +643 1e143 +644 1e144 +645 1e145 +646 1e146 +647 1e147 +648 1e148 +649 1e149 +650 1e150 +651 1e151 +652 1e152 +653 1e153 +654 1e154 +655 1e155 +656 1e156 +657 1e157 +658 1e158 +659 1e159 +660 1e160 +661 1e161 +662 1e162 +663 1e163 +664 1e164 +665 1e165 +666 1e166 +667 1e167 +668 1e168 +669 1e169 +670 1e170 +671 1e171 +672 1e172 +673 1e173 +674 1e174 +675 1e175 +676 1e176 +677 1e177 +678 1e178 +679 1e179 +680 1e180 +681 1e181 +682 1e182 +683 1e183 +684 1e184 +685 1e185 +686 1e186 +687 1e187 +688 1e188 +689 1e189 +690 1e190 +691 1e191 +692 1e192 +693 1e193 +694 1e194 +695 1e195 +696 1e196 +697 1e197 +698 1e198 +699 1e199 +700 1e200 +701 1e201 +702 1e202 +703 1e203 +704 1e204 +705 1e205 +706 1e206 +707 1e207 +708 1e208 +709 1e209 +710 1e210 +711 1e211 +712 1e212 +713 1e213 +714 1e214 +715 1e215 +716 1e216 +717 1e217 +718 1e218 +719 1e219 +720 1e220 +721 1e221 +722 1e222 +723 1e223 +724 1e224 +725 1e225 +726 1e226 +727 1e227 +728 1e228 +729 1e229 +730 1e230 +731 1e231 +732 1e232 +733 1e233 +734 1e234 +735 1e235 +736 1e236 +737 1e237 +738 1e238 +739 1e239 +740 1e240 +741 1e241 +742 1e242 +743 1e243 +744 1e244 +745 1e245 +746 1e246 +747 1e247 +748 1e248 +749 1e249 +750 1e250 +751 1e251 +752 1e252 +753 1e253 +754 1e254 +755 1e255 +756 1e256 +757 1e257 +758 1e258 +759 1e259 +760 1e260 +761 1e261 +762 1e262 +763 1e263 +764 1e264 +765 1e265 +766 1e266 +767 1e267 +768 1e268 +769 1e269 +770 1e270 +771 1e271 +772 1e272 +773 1e273 +774 1e274 +775 1e275 +776 1e276 +777 1e277 +778 1e278 +779 1e279 +780 1e280 +781 1e281 +782 1e282 +783 1e283 +784 1e284 +785 1e285 +786 1e286 +787 1e287 +788 1e288 +789 1e289 +790 1e290 +791 1e291 +792 1e292 +793 1e293 +794 1e294 +795 1e295 +796 1e296 +797 1e297 +798 1e298 +799 1e299 +800 1e300 +801 1e301 +802 1e302 +803 1e303 +804 1e304 +805 1e305 +806 1e306 +807 1e307 +808 1e308 +809 inf +810 inf +811 inf +812 inf +813 inf +814 inf +815 inf +816 inf +817 inf +818 inf +819 inf +820 inf +821 inf +822 inf +823 inf +824 inf +825 inf +826 inf +827 inf +828 inf +829 inf +830 inf +831 inf +832 inf +833 inf +834 inf +835 inf +836 inf +837 inf +838 inf +839 inf +840 inf +841 inf +842 inf +843 inf +844 inf +845 inf +846 inf +847 inf +848 inf +849 inf +850 inf +851 inf +852 inf +853 inf +854 inf +855 inf +856 inf +857 inf +858 inf +859 inf +860 inf +861 inf +862 inf +863 inf +864 inf +865 inf +866 inf +867 inf +868 inf +869 inf +870 inf +871 inf +872 inf +873 inf +874 inf +875 inf +876 inf +877 inf +878 inf +879 inf +880 inf +881 inf +882 inf +883 inf +884 inf +885 inf +886 inf +887 inf +888 inf +889 inf +890 inf +891 inf +892 inf +893 inf +894 inf +895 inf +896 inf +897 inf +898 inf +899 inf +900 inf +901 inf +902 inf +903 inf +904 inf +905 inf +906 inf +907 inf +908 inf +909 inf +910 inf +911 inf +912 inf +913 inf +914 inf +915 inf +916 inf +917 inf +918 inf +919 inf +920 inf +921 inf +922 inf +923 inf +924 inf +925 inf +926 inf +927 inf +928 inf +929 inf +930 inf +931 inf +932 inf +933 inf +934 inf +935 inf +936 inf +937 inf +938 inf +939 inf +940 inf +941 inf +942 inf +943 inf +944 inf +945 inf +946 inf +947 inf +948 inf +949 inf +950 inf +951 inf +952 inf +953 inf +954 inf +955 inf +956 inf +957 inf +958 inf +959 inf +960 inf +961 inf +962 inf +963 inf +964 inf +965 inf +966 inf +967 inf +968 inf +969 inf +970 inf +971 inf +972 inf +973 inf +974 inf +975 inf +976 inf +977 inf +978 inf +979 inf +980 inf +981 inf +982 inf +983 inf +984 inf +985 inf +986 inf +987 inf +988 inf +989 inf +990 inf +991 inf +992 inf +993 inf +994 inf +995 inf +996 inf +997 inf +998 inf +999 inf +nan diff --git a/dbms/tests/queries/0_stateless/00534_exp10.sql b/dbms/tests/queries/0_stateless/00534_exp10.sql index df4186d343c..f2836fe0655 100644 --- a/dbms/tests/queries/0_stateless/00534_exp10.sql +++ b/dbms/tests/queries/0_stateless/00534_exp10.sql @@ -1 +1,2 @@ -SELECT number, exp10(number) FROM system.numbers LIMIT 310; +SELECT number, exp10(number - 500) FROM system.numbers LIMIT 1000; +SELECT exp10(nan); diff --git a/libs/libcommon/src/preciseExp10.c b/libs/libcommon/src/preciseExp10.c index d24a7e60241..49c87217e20 100644 --- a/libs/libcommon/src/preciseExp10.c +++ b/libs/libcommon/src/preciseExp10.c @@ -171,47 +171,57 @@ obstacle to adoption, that text has been removed. #include #include +#include double preciseExp10(double x) { + if (isnan(x)) return NAN; + + // ranging between DBL_TRUE_MIN and DBL_MAX. Outsiders are treated as zeros or infinities static const double p10[] - = {1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, - 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, - 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, - 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, - 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62, 1e+63, 1e+64, - 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, - 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, - 1e+97, 1e+98, 1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, - 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, - 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143, 1e+144, - 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, - 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, - 1e+177, 1e+178, 1e+179, 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, - 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, - 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, - 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, - 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, - 1e+257, 1e+258, 1e+259, 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, 1e+270, 1e+271, 1e+272, - 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, - 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, - 1e+305, 1e+306, 1e+307, 1e+308}; + = {1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, + 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, + 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, + 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, + 1e-255, 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, + 1e-238, 1e-237, 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, + 1e-221, 1e-220, 1e-219, 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, + 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190, 1e-189, 1e-188, + 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, 1e-171, + 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, + 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, + 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, + 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, + 1e-102, 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, + 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, + 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, + 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, + 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, + 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, + 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, + 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, + 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, + 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, 1e+81, 1e+82, 1e+83, 1e+84, + 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, 1e+101, + 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, + 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, + 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, + 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, + 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179, 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, + 1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, + 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, + 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, + 1e+238, 1e+239, 1e+240, 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, + 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, 1e+270, 1e+271, + 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, + 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, + 1e+306, 1e+307, 1e+308}; double n, y = modf(x, &n); - if (n > 308) - return x > 0 ? INFINITY : -INFINITY; - if (!y) - return p10[(int)n + 15]; + if (n > 308) return INFINITY; + if (n < -323) return 0; - union - { - double f; - uint64_t i; - } u = {n}; - if ((u.i >> 52 & 0x7ff) < 0x3ff + 4) - { - y = exp2(3.32192809488736234787031942948939 * y); - return y * p10[(int)n + 15]; - } - return pow(10.0, x); + // Using lookup table based formula to get accurate results for integer arguments. + return exp2(3.32192809488736234787031942948939 * y) * p10[(int)n + 323]; } From 851b00c99fb6edbdcccd0d836a8759afaa6806bb Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 21 Oct 2019 19:22:54 +0300 Subject: [PATCH 064/122] minor visitors improvement --- .../Interpreters/CrossToInnerJoinVisitor.cpp | 8 ++--- dbms/src/Interpreters/InDepthNodeVisitor.h | 36 ++++++------------- .../JoinToSubqueryTransformVisitor.cpp | 33 ++++++----------- 3 files changed, 25 insertions(+), 52 deletions(-) diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index 52cb05d35ea..94b38b2c991 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -94,7 +94,7 @@ public: , ands_only(true) {} - void visit(const ASTFunction & node, ASTPtr & ast) + void visit(const ASTFunction & node, const ASTPtr & ast) { if (!ands_only) return; @@ -231,8 +231,8 @@ private: } }; -using CheckExpressionMatcher = OneTypeMatcher; -using CheckExpressionVisitor = InDepthNodeVisitor; +using CheckExpressionMatcher = ConstOneTypeMatcher; +using CheckExpressionVisitor = ConstInDepthNodeVisitor; bool getTables(ASTSelectQuery & select, std::vector & joined_tables, size_t & num_comma) @@ -314,7 +314,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da return; CheckExpressionVisitor::Data visitor_data{joined_tables}; - CheckExpressionVisitor(visitor_data).visit(select.refWhere()); + CheckExpressionVisitor(visitor_data).visit(select.where()); if (visitor_data.complex()) return; diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 6ed19da2e94..18b84b11b24 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -10,13 +10,13 @@ namespace DB /// Visits AST tree in depth, call functions for nodes according to Matcher type data. /// You need to define Data, visit() and needChildVisit() in Matcher class. -template -class InDepthNodeVisitorTemplate +template +class InDepthNodeVisitor { public: using Data = typename Matcher::Data; - InDepthNodeVisitorTemplate(Data & data_, std::ostream * ostr_ = nullptr) + InDepthNodeVisitor(Data & data_, std::ostream * ostr_ = nullptr) : data(data_), visit_depth(0), ostr(ostr_) @@ -49,42 +49,26 @@ private: }; template -using InDepthNodeVisitor = InDepthNodeVisitorTemplate; - -template -using ConstInDepthNodeVisitor = InDepthNodeVisitorTemplate; +using ConstInDepthNodeVisitor = InDepthNodeVisitor; /// Simple matcher for one node type without complex traversal logic. -template +template class OneTypeMatcher { public: - using Data = _Data; + using Data = Data_; using TypeToVisit = typename Data::TypeToVisit; - static bool needChildVisit(ASTPtr &, const ASTPtr &) { return _visit_children; } + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return visit_children; } - static void visit(ASTPtr & ast, Data & data) + static void visit(T & ast, Data & data) { if (auto * t = typeid_cast(ast.get())) data.visit(*t, ast); } }; -/// Links two simple matches into resulting one. There's no complex traversal logic: all the children would be visited. -template -class LinkedMatcher -{ -public: - using Data = std::pair; - - static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } - - static void visit(ASTPtr & ast, Data & data) - { - First::visit(ast, data.first); - Second::visit(ast, data.second); - } -}; +template +using ConstOneTypeMatcher = OneTypeMatcher; } diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index b60e6533921..2a10a7a28ae 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -37,8 +37,6 @@ namespace class ExtractAsterisksMatcher { public: - using Visitor = InDepthNodeVisitor; - struct Data { std::unordered_map table_columns; @@ -76,30 +74,16 @@ public: } }; - static bool needChildVisit(ASTPtr &, const ASTPtr &) { return false; } + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return false; } - static void visit(ASTPtr & ast, Data & data) + static void visit(const ASTPtr & ast, Data & data) { - if (auto * t = ast->as()) - visit(*t, ast, data); if (auto * t = ast->as()) visit(*t, ast, data); } private: - static void visit(ASTSelectQuery & node, ASTPtr &, Data & data) - { - if (data.table_columns.empty()) - return; - - Visitor(data).visit(node.refSelect()); - if (!data.new_select_expression_list) - return; - - node.setExpression(ASTSelectQuery::Expression::SELECT, std::move(data.new_select_expression_list)); - } - - static void visit(ASTExpressionList & node, ASTPtr &, Data & data) + static void visit(const ASTExpressionList & node, const ASTPtr &, Data & data) { bool has_asterisks = false; data.new_select_expression_list = std::make_shared(); @@ -375,7 +359,7 @@ using RewriteMatcher = OneTypeMatcher; using RewriteVisitor = InDepthNodeVisitor; using SetSubqueryAliasMatcher = OneTypeMatcher; using SetSubqueryAliasVisitor = InDepthNodeVisitor; -using ExtractAsterisksVisitor = ExtractAsterisksMatcher::Visitor; +using ExtractAsterisksVisitor = ConstInDepthNodeVisitor; using ColumnAliasesVisitor = ConstInDepthNodeVisitor; using AppendSemanticMatcher = OneTypeMatcher; using AppendSemanticVisitor = InDepthNodeVisitor; @@ -389,7 +373,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) visit(*t, ast, data); } -void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) +void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data) { using RevertedAliases = AsteriskSemantic::RevertedAliases; @@ -398,7 +382,12 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast return; ExtractAsterisksVisitor::Data asterisks_data(data.context, table_expressions); - ExtractAsterisksVisitor(asterisks_data).visit(ast); + if (!asterisks_data.table_columns.empty()) + { + ExtractAsterisksVisitor(asterisks_data).visit(select.select()); + if (asterisks_data.new_select_expression_list) + select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(asterisks_data.new_select_expression_list)); + } ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, "")); if (select.select()) From 3e66ca6f448aaa5047620ad7f377b9f6e333dee0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 22 Oct 2019 00:40:19 +0800 Subject: [PATCH 065/122] Create CODEOWNERS --- .github/CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000000..c93accf1009 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,3 @@ +dbms/* @ClickHouse/core-assigner +docs/* @ClickHouse/docs +docs/zh/* @ClickHouse/docs-zh From 83dbfe9634c302cd8e89ec75c5040c7bfaf1773e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Oct 2019 20:09:41 +0300 Subject: [PATCH 066/122] New tests and fixes for them --- dbms/src/Databases/DatabaseDictionary.cpp | 4 +- dbms/src/Databases/DatabaseDictionary.h | 4 +- dbms/src/Databases/DatabaseLazy.cpp | 2 +- dbms/src/Databases/DatabaseLazy.h | 4 +- dbms/src/Databases/DatabaseMemory.cpp | 4 +- dbms/src/Databases/DatabaseMemory.h | 5 +- dbms/src/Databases/DatabaseMySQL.h | 2 +- dbms/src/Databases/DatabaseOnDisk.cpp | 1 + dbms/src/Databases/DatabasesCommon.cpp | 8 +- dbms/src/Databases/DatabasesCommon.h | 4 +- dbms/src/Databases/IDatabase.h | 5 +- .../getDictionaryConfigurationFromAST.cpp | 30 +++-- ...018_ddl_dictionaries_bad_queries.reference | 7 + .../01018_ddl_dictionaries_bad_queries.sh | 120 ++++++++++++++++++ .../01018_ddl_dictionaries_create.sql | 2 + .../01018_ddl_dictionaries_select.sql | 4 + .../01018_ddl_dictionaries_special.reference | 11 ++ .../01018_ddl_dictionaries_special.sql | 94 ++++++++++++++ 18 files changed, 283 insertions(+), 28 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference create mode 100755 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.reference create mode 100644 dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 6e946bf0d79..52dde4efe9b 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -102,13 +102,13 @@ void DatabaseDictionary::removeDictionary( } void DatabaseDictionary::attachDictionary( - const String & /*dictionary_name*/, const Context & /*context*/, bool /*load*/) + const String & /*dictionary_name*/, const Context & /*context*/, bool /*reload*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); } void DatabaseDictionary::detachDictionary( - const String & /*dictionary_name*/, const Context & /*context*/) + const String & /*dictionary_name*/, const Context & /*context*/, bool /*reload*/) { throw Exception("Dictionary engine doesn't support dictionaries.", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 7607ee61c0d..9e88b50a68e 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -89,9 +89,9 @@ public: ASTPtr tryGetCreateDictionaryQuery(const Context & context, const String & table_name) const override; - void attachDictionary(const String & dictionary_name, const Context & context, bool load) override; + void attachDictionary(const String & dictionary_name, const Context & context, bool reload) override; - void detachDictionary(const String & dictionary_name, const Context & context) override; + void detachDictionary(const String & dictionary_name, const Context & context, bool reload) override; void shutdown() override; diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 022509fbcd8..d84ee61aea5 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -129,7 +129,7 @@ void DatabaseLazy::attachDictionary( throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } -void DatabaseLazy::detachDictionary(const String & /*dictionary_name*/, const Context & /*context*/) +void DatabaseLazy::detachDictionary(const String & /*dictionary_name*/, const Context & /*context*/, bool /*reload*/) { throw Exception("Lazy engine can be used only with *Log tables.", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseLazy.h b/dbms/src/Databases/DatabaseLazy.h index 19d0d4cb51f..c268f58945c 100644 --- a/dbms/src/Databases/DatabaseLazy.h +++ b/dbms/src/Databases/DatabaseLazy.h @@ -111,9 +111,9 @@ public: StoragePtr detachTable(const String & table_name) override; - void attachDictionary(const String & dictionary_name, const Context & context, bool load) override; + void attachDictionary(const String & dictionary_name, const Context & context, bool reload) override; - void detachDictionary(const String & dictionary_name, const Context & context) override; + void detachDictionary(const String & dictionary_name, const Context & context, bool reload) override; void shutdown() override; diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index ece77c8cd88..0badc9b4df4 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -35,7 +35,7 @@ void DatabaseMemory::createTable( } -void DatabaseMemory::attachDictionary(const String & /*name*/, const Context & /*context*/, bool /*load*/) +void DatabaseMemory::attachDictionary(const String & /*name*/, const Context & /*context*/, bool /*reload*/) { throw Exception("There is no ATTACH DICTIONARY query for DatabaseMemory", ErrorCodes::UNSUPPORTED_METHOD); } @@ -57,7 +57,7 @@ void DatabaseMemory::removeTable( } -void DatabaseMemory::detachDictionary(const String & /*name*/, const Context & /*context*/) +void DatabaseMemory::detachDictionary(const String & /*name*/, const Context & /*context*/, bool /*reload*/) { throw Exception("There is no DETACH DICTIONARY query for DatabaseMemory", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index a04d01d8aae..45f51a177f7 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -41,7 +41,7 @@ public: void attachDictionary( const String & name, const Context & context, - bool load) override; + bool reload) override; void removeTable( const Context & context, @@ -53,7 +53,8 @@ public: void detachDictionary( const String & name, - const Context & context) override; + const Context & context, + bool reload) override; time_t getObjectMetadataModificationTime(const Context & context, const String & table_name) override; diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index bc023b51282..3ca8722b248 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -64,7 +64,7 @@ public: throw Exception("MySQL database engine does not support detach table.", ErrorCodes::NOT_IMPLEMENTED); } - void detachDictionary(const String &, const Context &) override + void detachDictionary(const String &, const Context &, bool) override { throw Exception("MySQL database engine does not support detach dictionary.", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index 44c73bc51b5..76e7511de6c 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -312,6 +312,7 @@ void DatabaseOnDisk::createDictionary( } catch (...) { + database.detachDictionary(dictionary_name, context); Poco::File(dictionary_metadata_tmp_path).remove(); throw; } diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index e1ee1045657..9b3b92ad765 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -161,7 +161,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name) return res; } -void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name, const Context & context) +void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name, const Context & context, bool reload) { { std::lock_guard lock(mutex); @@ -171,7 +171,11 @@ void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name, dictionaries.erase(it); } - context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, true); + if (reload) + { + bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); + context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, !lazy_load); + } } diff --git a/dbms/src/Databases/DatabasesCommon.h b/dbms/src/Databases/DatabasesCommon.h index 00034dacbce..f07136aa927 100644 --- a/dbms/src/Databases/DatabasesCommon.h +++ b/dbms/src/Databases/DatabasesCommon.h @@ -33,11 +33,11 @@ public: void attachTable(const String & table_name, const StoragePtr & table) override; - void attachDictionary(const String & name, const Context & context, bool load) override; + void attachDictionary(const String & name, const Context & context, bool reload) override; StoragePtr detachTable(const String & table_name) override; - void detachDictionary(const String & name, const Context & context) override; + void detachDictionary(const String & name, const Context & context, bool reload) override; DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 65bcc62bfdb..ac1ccc096d5 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -165,13 +165,14 @@ public: virtual void attachTable(const String & name, const StoragePtr & table) = 0; /// Add dictionary to the database, but do not add it to the metadata. The database may not support this method. - virtual void attachDictionary(const String & name, const Context & context, bool load=true) = 0; + /// load is false when we starting up and lazy_load is true, so we don't want to load dictionaries synchronously. + virtual void attachDictionary(const String & name, const Context & context, bool reload = true) = 0; /// Forget about the table without deleting it, and return it. The database may not support this method. virtual StoragePtr detachTable(const String & name) = 0; /// Forget about the dictionary without deleting it, and return it. The database may not support this method. - virtual void detachDictionary(const String & name, const Context & context) = 0; + virtual void detachDictionary(const String & name, const Context & context, bool reload = true) = 0; /// Rename the table and possibly move the table to another database. virtual void renameTable( diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 08b523ae10f..00f86d9de2b 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -281,23 +281,25 @@ void buildPrimaryKeyConfiguration( /** * Transforms list of ASTDictionaryAttributeDeclarations to list of dictionary attributes */ -void buildDictionaryAttributesConfiguration( +std::unordered_set buildDictionaryAttributesConfiguration( AutoPtr doc, AutoPtr root, const ASTExpressionList * dictionary_attributes, const Names & key_columns) { const auto & children = dictionary_attributes->children; + std::unordered_set dictionary_attributes_names; for (size_t i = 0; i < children.size(); ++i) { const ASTDictionaryAttributeDeclaration * dict_attr = children[i]->as(); if (!dict_attr->type) throw Exception("Dictionary attribute must has type", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + dictionary_attributes_names.insert(dict_attr->name); if (std::find(key_columns.begin(), key_columns.end(), dict_attr->name) == key_columns.end()) buildSingleAttribute(doc, root, dict_attr); - } + return dictionary_attributes_names; } /** Transform function with key-value arguments to configuration @@ -371,23 +373,30 @@ void checkAST(const ASTCreateQuery & query) throw Exception("Cannot convert dictionary to configuration from non-dictionary AST.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); if (query.dictionary_attributes_list == nullptr || query.dictionary_attributes_list->children.empty()) - throw Exception("Dictionary AST missing attributes list.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + throw Exception("Cannot create dictionary with empty attributes list", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); if (query.dictionary->layout == nullptr) - throw Exception("Cannot create dictionary with empty layout.", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + throw Exception("Cannot create dictionary with empty layout", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); if (query.dictionary->lifetime == nullptr) - throw Exception("Dictionary AST missing lifetime section", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + throw Exception("Cannot create dictionary with empty lifetime", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); if (query.dictionary->primary_key == nullptr) - throw Exception("Dictionary AST missing primary key", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + throw Exception("Cannot create dictionary without primary key", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); if (query.dictionary->source == nullptr) - throw Exception("Dictionary AST missing source", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); + throw Exception("Cannot create dictionary with empty source", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); /// Range can be empty } +void checkPrimaryKey(const std::unordered_set & all_attrs, const Names & key_attrs) +{ + for (const auto & key_attr : key_attrs) + if (all_attrs.count(key_attr) == 0) + throw Exception("Unknown key attribute '" + key_attr + "'", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION); +} + } @@ -409,14 +418,15 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer AutoPtr structure_element(xml_document->createElement("structure")); current_dictionary->appendChild(structure_element); - Names pk_columns = getPrimaryKeyColumns(query.dictionary->primary_key); + Names pk_attrs = getPrimaryKeyColumns(query.dictionary->primary_key); auto dictionary_layout = query.dictionary->layout; bool complex = DictionaryFactory::instance().isComplex(dictionary_layout->layout_type); - buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_columns); + auto all_attr_names = buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_attrs); + checkPrimaryKey(all_attr_names, pk_attrs); - buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_columns, query.dictionary_attributes_list); + buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list); buildLayoutConfiguration(xml_document, current_dictionary, dictionary_layout); buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source); diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference new file mode 100644 index 00000000000..abc3218ce6c --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference @@ -0,0 +1,7 @@ +2 +2 +2 +2 +2 +2 +2 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh new file mode 100755 index 00000000000..782e6955b6e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "DROP DICTIONARY IF EXISTS dict1" + +# Simple layout, but with two keys +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY key1, key2 + LAYOUT(HASHED()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) + LIFETIME(MIN 1 MAX 10) +" 2>&1 | grep -c 'Primary key for simple dictionary must contain exactly one element' + + +# Simple layout, but with non existing key +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY non_existing_column + LAYOUT(HASHED()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) + LIFETIME(MIN 1 MAX 10) +" 2>&1 | grep -c "Unknown key attribute 'non_existing_column'" + +# Complex layout, with non existing key +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY non_existing_column, key1 + LAYOUT(COMPLEX_KEY_HASHED()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) + LIFETIME(MIN 1 MAX 10) +" 2>&1 | grep -c "Unknown key attribute 'non_existing_column'" + +# No layout +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY key2, key1 + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) + LIFETIME(MIN 1 MAX 10) +" 2>&1 | grep -c "Cannot create dictionary with empty layout" + +# No PK +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + LAYOUT(COMPLEX_KEY_HASHED()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) + LIFETIME(MIN 1 MAX 10) +" 2>&1 | grep -c "Cannot create dictionary without primary key" + +# No lifetime +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY key2, key1 + LAYOUT(COMPLEX_KEY_HASHED()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) +" 2>&1 | grep -c "Cannot create dictionary with empty lifetime" + +# No source +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY non_existing_column, key1 + LAYOUT(COMPLEX_KEY_HASHED()) + LIFETIME(MIN 1 MAX 10) +" 2>&1 | grep -c "Cannot create dictionary with empty source" + + +# Complex layout, but with one key +$CLICKHOUSE_CLIENT -q " + CREATE DICTIONARY dict1 + ( + key1 UInt64, + key2 UInt64, + value String + ) + PRIMARY KEY key1 + LAYOUT(COMPLEX_KEY_HASHED()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) + LIFETIME(MIN 1 MAX 10) +" || exit 1 + + +$CLICKHOUSE_CLIENT -q "DROP DICTIONARY IF EXISTS dict1" diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 9e7cf24d9c5..a8cff8546e3 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -66,6 +66,8 @@ SELECT '==DROP DICTIONARY'; DROP DICTIONARY IF EXISTS ordinary_db.dict1; +SYSTEM RELOAD DICTIONARY 'ordinary_db.dict1'; -- due to lazy_load at can persist for some time + SHOW DICTIONARIES FROM ordinary_db LIKE 'dict1'; EXISTS DICTIONARY ordinary_db.dict1; diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 1b9741bd720..19f2d1244a0 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -41,6 +41,8 @@ SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', t DETACH DICTIONARY database_for_dict.dict1; +SYSTEM RELOAD DICTIONARY 'database_for_dict.dict1'; + SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} ATTACH DICTIONARY database_for_dict.dict1; @@ -49,6 +51,8 @@ SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); DROP DICTIONARY database_for_dict.dict1; +SYSTEM RELOAD DICTIONARY 'database_for_dict.dict1'; + SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} CREATE DICTIONARY database_for_dict.dict1 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.reference new file mode 100644 index 00000000000..0c5d37b815f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.reference @@ -0,0 +1,11 @@ +0.33 +0.42 +0.46 +0 +17501 +NP +Moscow +[3,2,1,10000] +1 +1 +0 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql new file mode 100644 index 00000000000..196e78480a1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_special.sql @@ -0,0 +1,94 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +CREATE TABLE database_for_dict.table_for_dict +( + CountryID UInt64, + StartDate Date, + EndDate Date, + Tax Float64 +) +ENGINE = MergeTree() +ORDER BY CountryID; + +INSERT INTO database_for_dict.table_for_dict VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO database_for_dict.table_for_dict VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO database_for_dict.table_for_dict VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); + +CREATE DICTIONARY database_for_dict.dict1 +( + CountryID UInt64, + StartDate Date, + EndDate Date, + Tax Float64 +) +PRIMARY KEY CountryID +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(RANGE_HASHED()) +RANGE(MIN StartDate MAX EndDate); + +SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('2019-05-15')); +SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('2019-05-29')); +SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-29')); +SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-31')); + +CREATE TABLE database_for_dict.table_ip_trie +( + prefix String, + asn UInt32, + cca2 String +) +engine = TinyLog; + +INSERT INTO database_for_dict.table_ip_trie VALUES ('202.79.32.0/20', 17501, 'NP'), ('2620:0:870::/48', 3856, 'US'), ('2a02:6b8:1::/48', 13238, 'RU'), ('2001:db8::/32', 65536, 'ZZ'); + + +CREATE DICTIONARY database_for_dict.dict_ip_trie +( + prefix String, + asn UInt32, + cca2 String +) +PRIMARY KEY prefix +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie')) +LAYOUT(IP_TRIE()) +LIFETIME(MIN 10 MAX 100); + +SELECT dictGetUInt32('database_for_dict.dict_ip_trie', 'asn', tuple(IPv4StringToNum('202.79.32.0'))); +SELECT dictGetString('database_for_dict.dict_ip_trie', 'cca2', tuple(IPv4StringToNum('202.79.32.0'))); + +CREATE TABLE database_for_dict.table_with_hierarchy +( + RegionID UInt64, + ParentRegionID UInt64, + RegionName String +) +ENGINE = MergeTree() +ORDER BY RegionID; + +INSERT INTO database_for_dict.table_with_hierarchy VALUES (3, 2, 'Hamovniki'), (2, 1, 'Moscow'), (1, 10000, 'Russia') (7, 10000, 'Ulan-Ude'); + + +CREATE DICTIONARY database_for_dict.dictionary_with_hierarchy +( + RegionID UInt64, + ParentRegionID UInt64 HIERARCHICAL, + RegionName String +) +PRIMARY KEY RegionID +SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_with_hierarchy')) +LAYOUT(HASHED()) +LIFETIME(MIN 1 MAX 1000); + +SELECT dictGetString('database_for_dict.dictionary_with_hierarchy', 'RegionName', toUInt64(2)); +SELECT dictGetHierarchy('database_for_dict.dictionary_with_hierarchy', toUInt64(3)); +SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(3), toUInt64(2)); +SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(7), toUInt64(10000)); +SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(1), toUInt64(5)); + +DROP DATABASE IF EXISTS database_for_dict; + From 82d0dfa62961ebdea4fffec699cde9dae9d9fd27 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 22 Oct 2019 11:54:16 +0800 Subject: [PATCH 067/122] Better add_globs --- cmake/dbms_glob_sources.cmake | 19 +++++++++++++------ dbms/CMakeLists.txt | 6 ++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 5ea9fe88ca3..9c8c53c63b6 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -1,13 +1,20 @@ -macro(add_glob cur_list) - file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN}) - list(APPEND ${cur_list} ${__tmp}) -endmacro() +if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.12") + macro(add_glob cur_list) + file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS ${ARGN}) + list(APPEND ${cur_list} ${__tmp}) + endmacro() +else () + macro(add_glob cur_list) + file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN}) + list(APPEND ${cur_list} ${__tmp}) + endmacro() +endif () macro(add_headers_and_sources prefix common_path) - add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) + add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) endmacro() macro(add_headers_only prefix common_path) - add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) + add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) endmacro() diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index ec9ffc6e3dd..78307c15123 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -153,12 +153,10 @@ add_subdirectory(src/Common/Config) set (all_modules) macro(add_object_library name common_path) if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - add_glob(dbms_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(dbms_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) + add_headers_and_sources(dbms ${common_path}) else () list (APPEND all_modules ${name}) - add_glob(${name}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(${name}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) + add_headers_and_sources(${name} ${common_path}) add_library(${name} SHARED ${${name}_sources} ${${name}_headers}) target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all) endif () From 88834bd75a6d280813e4890b4d0809329e320e00 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 22 Oct 2019 15:10:59 +0800 Subject: [PATCH 068/122] Resolve DUMP overload resolution ambiguity. --- dbms/src/Core/iostream_debug_helpers.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Core/iostream_debug_helpers.h b/dbms/src/Core/iostream_debug_helpers.h index 92157a9436d..7d109b4604b 100644 --- a/dbms/src/Core/iostream_debug_helpers.h +++ b/dbms/src/Core/iostream_debug_helpers.h @@ -6,11 +6,18 @@ namespace DB { + +// Used to disable implicit casting for certain overloaded types such as Field, which leads to +// overload resolution ambiguity. +template struct Dumpable; +template +std::ostream & operator<<(std::ostream & stream, const typename Dumpable::Type & what); + class IBlockInputStream; std::ostream & operator<<(std::ostream & stream, const IBlockInputStream & what); class Field; -std::ostream & operator<<(std::ostream & stream, const Field & what); +template <> struct Dumpable { using Type = Field; }; struct NameAndTypePair; std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what); From 395e63ddf13e9f2a564563e078c5d56664d229e1 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 22 Oct 2019 12:24:30 +0300 Subject: [PATCH 069/122] Fixed erroneous warning `max_data_part_size is too low` #7414 --- dbms/src/Common/DiskSpaceMonitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/DiskSpaceMonitor.cpp b/dbms/src/Common/DiskSpaceMonitor.cpp index 00a146a809e..a09daf6f677 100644 --- a/dbms/src/Common/DiskSpaceMonitor.cpp +++ b/dbms/src/Common/DiskSpaceMonitor.cpp @@ -292,7 +292,7 @@ Volume::Volume( formatReadableSizeWithBinarySuffix(max_data_part_size) << ")"); } constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u; - if (max_data_part_size < MIN_PART_SIZE) + if (max_data_part_size != 0 && max_data_part_size < MIN_PART_SIZE) LOG_WARNING(logger, "Volume " << backQuote(name) << " max_data_part_size is too low (" << formatReadableSizeWithBinarySuffix(max_data_part_size) << " < " << formatReadableSizeWithBinarySuffix(MIN_PART_SIZE) << ")"); From 7ba4e36e73c135ba8bcf65eb14cd9959621e904f Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 22 Oct 2019 13:31:28 +0300 Subject: [PATCH 070/122] Don't push to MVs when inserting into Kafka table (#7265) * Do not insert values into MV when inserting directly to Kafka * Add method noPushingToViews() to IStorage interface To separate InterpreterInsertQuery and StorageKafka --- .../Interpreters/InterpreterInsertQuery.cpp | 39 +++++++++++-------- .../src/Interpreters/InterpreterInsertQuery.h | 8 +++- dbms/src/Parsers/ASTInsertQuery.h | 3 -- dbms/src/Storages/IStorage.h | 3 ++ dbms/src/Storages/Kafka/Buffer_fwd.h | 14 +++++++ .../Storages/Kafka/KafkaBlockOutputStream.cpp | 1 + .../Kafka/ReadBufferFromKafkaConsumer.cpp | 2 + .../Kafka/ReadBufferFromKafkaConsumer.h | 2 - dbms/src/Storages/Kafka/StorageKafka.cpp | 5 ++- dbms/src/Storages/Kafka/StorageKafka.h | 11 +++++- .../Kafka/WriteBufferToKafkaProducer.h | 3 -- .../integration/test_storage_kafka/test.py | 17 ++++---- 12 files changed, 68 insertions(+), 40 deletions(-) create mode 100644 dbms/src/Storages/Kafka/Buffer_fwd.h diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 18652083f06..61c153b13be 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -1,29 +1,25 @@ -#include -#include - -#include -#include +#include #include #include #include -#include #include #include +#include #include +#include #include #include -#include #include - +#include +#include +#include +#include #include #include - -#include -#include - +#include #include -#include +#include namespace DB @@ -38,8 +34,12 @@ namespace ErrorCodes InterpreterInsertQuery::InterpreterInsertQuery( - const ASTPtr & query_ptr_, const Context & context_, bool allow_materialized_, bool no_squash_) - : query_ptr(query_ptr_), context(context_), allow_materialized(allow_materialized_), no_squash(no_squash_) + const ASTPtr & query_ptr_, const Context & context_, bool allow_materialized_, bool no_squash_, bool no_destination_) + : query_ptr(query_ptr_) + , context(context_) + , allow_materialized(allow_materialized_) + , no_squash(no_squash_) + , no_destination(no_destination_) { checkStackSize(); } @@ -65,7 +65,7 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const /// If the query does not include information about columns if (!query.columns) { - if (query.no_destination) + if (no_destination) return table->getSampleBlockWithVirtuals(); else return table_sample_non_materialized; @@ -102,7 +102,12 @@ BlockIO InterpreterInsertQuery::execute() /// We create a pipeline of several streams, into which we will write data. BlockOutputStreamPtr out; - out = std::make_shared(query.database, query.table, table, context, query_ptr, query.no_destination); + /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. + /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. + if (table->noPushingToViews() && !no_destination) + out = table->write(query_ptr, context); + else + out = std::make_shared(query.database, query.table, table, context, query_ptr, no_destination); /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.h b/dbms/src/Interpreters/InterpreterInsertQuery.h index 37cb3110984..b9deb06fb32 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.h +++ b/dbms/src/Interpreters/InterpreterInsertQuery.h @@ -15,7 +15,12 @@ namespace DB class InterpreterInsertQuery : public IInterpreter { public: - InterpreterInsertQuery(const ASTPtr & query_ptr_, const Context & context_, bool allow_materialized_ = false, bool no_squash_ = false); + InterpreterInsertQuery( + const ASTPtr & query_ptr_, + const Context & context_, + bool allow_materialized_ = false, + bool no_squash_ = false, + bool no_destination_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -35,6 +40,7 @@ private: const Context & context; const bool allow_materialized; const bool no_squash; + const bool no_destination; }; diff --git a/dbms/src/Parsers/ASTInsertQuery.h b/dbms/src/Parsers/ASTInsertQuery.h index 3f91c622e7b..7eab80ca54d 100644 --- a/dbms/src/Parsers/ASTInsertQuery.h +++ b/dbms/src/Parsers/ASTInsertQuery.h @@ -20,9 +20,6 @@ public: ASTPtr table_function; ASTPtr settings_ast; - // Set to true if the data should only be inserted into attached views - bool no_destination = false; - /// Data to insert const char * data = nullptr; const char * end = nullptr; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 8d30f31bde3..b4d6b2c3085 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -102,6 +102,9 @@ public: /// Returns true if the storage supports settings. virtual bool supportsSettings() const { return false; } + /// Returns true if the blocks shouldn't be pushed to associated views on insert. + virtual bool noPushingToViews() const { return false; } + /// Optional size information of each physical column. /// Currently it's only used by the MergeTree family for query optimizations. using ColumnSizeByName = std::unordered_map; diff --git a/dbms/src/Storages/Kafka/Buffer_fwd.h b/dbms/src/Storages/Kafka/Buffer_fwd.h new file mode 100644 index 00000000000..89e72454aa5 --- /dev/null +++ b/dbms/src/Storages/Kafka/Buffer_fwd.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBufferFromKafkaConsumer; +class WriteBufferToKafkaProducer; + +using ConsumerBufferPtr = std::shared_ptr; +using ProducerBufferPtr = std::shared_ptr; + +} diff --git a/dbms/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockOutputStream.cpp index 574b3bd9fd1..b887d573452 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockOutputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockOutputStream.cpp @@ -1,6 +1,7 @@ #include "KafkaBlockOutputStream.h" #include +#include namespace DB { diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 9a3bd73a6b2..44b8a119240 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -2,6 +2,8 @@ #include +#include + namespace DB { diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index 8c2fcd3c7bb..d1ea961cef2 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -61,6 +61,4 @@ private: bool nextImpl() override; }; -using ConsumerBufferPtr = std::shared_ptr; - } diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 99e86bbdd69..afc518b415a 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -366,7 +367,6 @@ bool StorageKafka::streamToViews() auto insert = std::make_shared(); insert->database = database_name; insert->table = table_name; - insert->no_destination = true; // Only insert into dependent views and expect that input blocks contain virtual columns const Settings & settings = global_context.getSettingsRef(); size_t block_size = max_block_size; @@ -374,7 +374,8 @@ bool StorageKafka::streamToViews() block_size = settings.max_block_size; // Create a stream for each consumer and join them in a union stream - InterpreterInsertQuery interpreter(insert, global_context, false, true); + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, global_context, false, true, true); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index e8799983705..492d1d65411 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -2,8 +2,7 @@ #include #include -#include -#include +#include #include #include @@ -11,6 +10,12 @@ #include #include +namespace cppkafka +{ + +class Configuration; + +} namespace DB { @@ -25,7 +30,9 @@ public: std::string getName() const override { return "Kafka"; } std::string getTableName() const override { return table_name; } std::string getDatabaseName() const override { return database_name; } + bool supportsSettings() const override { return true; } + bool noPushingToViews() const override { return true; } void startup() override; void shutdown() override; diff --git a/dbms/src/Storages/Kafka/WriteBufferToKafkaProducer.h b/dbms/src/Storages/Kafka/WriteBufferToKafkaProducer.h index d11766d7f35..b6751551ec7 100644 --- a/dbms/src/Storages/Kafka/WriteBufferToKafkaProducer.h +++ b/dbms/src/Storages/Kafka/WriteBufferToKafkaProducer.h @@ -9,9 +9,6 @@ namespace DB { -class WriteBufferToKafkaProducer; - -using ProducerBufferPtr = std::shared_ptr; using ProducerPtr = std::shared_ptr; class WriteBufferToKafkaProducer : public WriteBuffer diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index c629ac9f22e..a2e1511537a 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -560,6 +560,8 @@ def test_kafka_insert(kafka_cluster): @pytest.mark.timeout(180) def test_kafka_produce_consume(kafka_cluster): instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', @@ -567,6 +569,11 @@ def test_kafka_produce_consume(kafka_cluster): kafka_group_name = 'insert2', kafka_format = 'TSV', kafka_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; ''') messages_num = 10000 @@ -594,16 +601,6 @@ def test_kafka_produce_consume(kafka_cluster): time.sleep(random.uniform(0, 1)) thread.start() - instance.query(''' - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - ''') - while True: result = instance.query('SELECT count() FROM test.view') time.sleep(1) From dfd9f2665236b2e2b3c220aa5f46fa3bd7f4f289 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 22 Oct 2019 13:43:52 +0300 Subject: [PATCH 071/122] Port the latest dispatch() improvements. --- dbms/src/Common/HashTable/StringHashTable.h | 79 +++++-------------- .../HashTable/TwoLevelStringHashTable.h | 53 +++++++------ 2 files changed, 47 insertions(+), 85 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index 7ea17acb316..b23edb396ae 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -3,36 +3,6 @@ #include #include -#define CASE_1_8 \ - case 1: \ - case 2: \ - case 3: \ - case 4: \ - case 5: \ - case 6: \ - case 7: \ - case 8 - -#define CASE_9_16 \ - case 9: \ - case 10: \ - case 11: \ - case 12: \ - case 13: \ - case 14: \ - case 15: \ - case 16 - -#define CASE_17_24 \ - case 17: \ - case 18: \ - case 19: \ - case 20: \ - case 21: \ - case 22: \ - case 23: \ - case 24 - struct StringKey0 { }; @@ -46,17 +16,6 @@ struct StringKey24 UInt64 c; bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } - bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); } - bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; } - bool operator!=(const UInt64 rhs) const { return !operator==(rhs); } - - StringKey24 & operator=(const UInt64 rhs) - { - a = rhs; - b = 0; - c = 0; - return *this; - } }; inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n) @@ -71,10 +30,6 @@ inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) { return {reinterpret_cast(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; } -inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s) -{ - return s; -} struct StringHashTableHash { @@ -242,18 +197,23 @@ public: // Dispatch is written in a way that maximizes the performance: // 1. Always memcpy 8 times bytes // 2. Use switch case extension to generate fast dispatching table - // 3. Combine hash computation along with key loading - // 4. Funcs are named callables that can be force_inlined - // NOTE: It relies on Little Endianness and SSE4.2 + // 3. Funcs are named callables that can be force_inlined + // NOTE: It relies on Little Endianness template decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) { - static constexpr StringKey0 key0{}; const StringRef & x = keyHolderGetKey(key_holder); - size_t sz = x.size; + const size_t sz = x.size; + if (sz == 0) + { + static constexpr StringKey0 key0{}; + keyHolderDiscardKey(key_holder); + return func(m0, key0, 0); + } + const char * p = x.data; // pending bits that needs to be shifted out - char s = (-sz & 7) * 8; + const char s = (-sz & 7) * 8; union { StringKey8 k8; @@ -262,12 +222,10 @@ public: UInt64 n[3]; }; StringHashTableHash hash; - switch (sz) + switch ((sz - 1) >> 3) { - case 0: - keyHolderDiscardKey(key_holder); - return func(m0, key0, 0); - CASE_1_8 : { + case 0: // 1..8 bytes + { // first half page if ((reinterpret_cast(p) & 2048) == 0) { @@ -283,7 +241,8 @@ public: keyHolderDiscardKey(key_holder); return func(m1, k8, hash(k8)); } - CASE_9_16 : { + case 1: // 9..16 bytes + { memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); @@ -291,7 +250,8 @@ public: keyHolderDiscardKey(key_holder); return func(m2, k16, hash(k16)); } - CASE_17_24 : { + case 2: // 17..24 bytes + { memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); @@ -299,7 +259,8 @@ public: keyHolderDiscardKey(key_holder); return func(m3, k24, hash(k24)); } - default: { + default: // >= 25 bytes + { return func(ms, std::forward(key_holder), hash(x)); } } diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index ed1e1b01857..2aeb266c66b 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -69,23 +69,23 @@ public: } } - // Dispatch is written in a way that maximizes the performance: - // 1. Always memcpy 8 times bytes - // 2. Use switch case extension to generate fast dispatching table - // 3. Combine hash computation along with bucket computation and key loading - // 4. Funcs are named callables that can be force_inlined - // NOTE: It relies on Little Endianness and SSE4.2 + // This function is mostly the same as StringHashTable::dispatch, but with + // added bucket computation. See the comments there. template decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) { - static constexpr StringKey0 key0{}; const StringRef & x = keyHolderGetKey(key_holder); - size_t sz = x.size; + const size_t sz = x.size; + if (sz == 0) + { + static constexpr StringKey0 key0{}; + keyHolderDiscardKey(key_holder); + return func(impls[0].m0, key0, 0); + } + const char * p = x.data; // pending bits that needs to be shifted out - char s = (-sz & 7) * 8; - size_t res = -1ULL; - size_t buck; + const char s = (-sz & 7) * 8; union { StringKey8 k8; @@ -94,12 +94,10 @@ public: UInt64 n[3]; }; StringHashTableHash hash; - switch (sz) + switch ((sz - 1) >> 3) { case 0: - keyHolderDiscardKey(key_holder); - return func(impls[0].m0, key0, 0); - CASE_1_8 : { + { // first half page if ((reinterpret_cast(p) & 2048) == 0) { @@ -112,34 +110,37 @@ public: memcpy(&n[0], lp, 8); n[0] >>= s; } - res = hash(k8); - buck = getBucketFromHash(res); + auto res = hash(k8); + auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(impls[buck].m1, k8, res); } - CASE_9_16 : { + case 1: + { memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); n[1] >>= s; - res = hash(k16); - buck = getBucketFromHash(res); + auto res = hash(k16); + auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(impls[buck].m2, k16, res); } - CASE_17_24 : { + case 2: + { memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); n[2] >>= s; - res = hash(k24); - buck = getBucketFromHash(res); + auto res = hash(k24); + auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(impls[buck].m3, k24, res); } - default: { - res = hash(x); - buck = getBucketFromHash(res); + default: + { + auto res = hash(x); + auto buck = getBucketFromHash(res); return func(impls[buck].ms, std::forward(key_holder), res); } } From 4c5d150d7a86e0a1dfaed142f7b0deca2d6c49c4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 13:47:43 +0300 Subject: [PATCH 072/122] Review fixes --- dbms/src/Common/ErrorCodes.cpp | 10 ++-- dbms/src/Databases/DatabaseFactory.cpp | 20 +++---- dbms/src/Databases/DatabaseOnDisk.cpp | 59 +++++++++---------- dbms/src/Databases/DatabaseOrdinary.cpp | 24 ++------ .../getDictionaryConfigurationFromAST.cpp | 8 ++- dbms/src/Interpreters/Context.cpp | 2 +- 6 files changed, 51 insertions(+), 72 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index b8a0737c4b5..7108683f19c 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -155,7 +155,7 @@ namespace ErrorCodes extern const int NOT_FOUND_FUNCTION_ELEMENT_FOR_AGGREGATE = 147; extern const int NOT_FOUND_RELATION_ELEMENT_FOR_CONDITION = 148; extern const int NOT_FOUND_RHS_ELEMENT_FOR_CONDITION = 149; - extern const int NO_ATTRIBUTES_LISTED = 150; + extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED = 150; extern const int INDEX_OF_COLUMN_IN_SORT_CLAUSE_IS_OUT_OF_RANGE = 151; extern const int UNKNOWN_DIRECTION_OF_SORTING = 152; extern const int ILLEGAL_DIVISION = 153; @@ -361,7 +361,7 @@ namespace ErrorCodes extern const int PART_IS_TEMPORARILY_LOCKED = 384; extern const int MULTIPLE_STREAMS_REQUIRED = 385; extern const int NO_COMMON_TYPE = 386; - extern const int EXTERNAL_LOADABLE_ALREADY_EXISTS = 387; + extern const int DICTIONARY_ALREADY_EXISTS = 387; extern const int CANNOT_ASSIGN_OPTIMIZE = 388; extern const int INSERT_WAS_DEDUPLICATED = 389; extern const int CANNOT_GET_CREATE_TABLE_QUERY = 390; @@ -460,10 +460,8 @@ namespace ErrorCodes extern const int TOO_MANY_REDIRECTS = 483; extern const int INTERNAL_REDIS_ERROR = 484; extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 485; - extern const int DICTIONARY_ALREADY_EXISTS = 486; - extern const int UNKNOWN_DICTIONARY = 487; - extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED = 488; - extern const int INCORRECT_DICTIONARY_DEFINITION = 489; + extern const int UNKNOWN_DICTIONARY = 486; + extern const int INCORRECT_DICTIONARY_DEFINITION = 487; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Databases/DatabaseFactory.cpp b/dbms/src/Databases/DatabaseFactory.cpp index 5b6456fc8b8..a968e9140c6 100644 --- a/dbms/src/Databases/DatabaseFactory.cpp +++ b/dbms/src/Databases/DatabaseFactory.cpp @@ -54,13 +54,12 @@ DatabasePtr DatabaseFactory::get( { const ASTFunction * engine = engine_define->engine; - std::vector arguments; - if (engine->arguments) - arguments = engine->arguments->children; + if (!engine->arguments || engine->arguments->children.size() != 4) + throw Exception( + "MySQL Database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.", + ErrorCodes::BAD_ARGUMENTS); - if (arguments.size() != 4) - throw Exception("MySQL Database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.", - ErrorCodes::BAD_ARGUMENTS); + const auto & arguments = engine->arguments->children; const auto & mysql_host_name = arguments[0]->as()->value.safeGet(); const auto & mysql_database_name = arguments[1]->as()->value.safeGet(); @@ -78,13 +77,10 @@ DatabasePtr DatabaseFactory::get( { const ASTFunction * engine = engine_define->engine; - std::vector arguments; - if (engine->arguments) - arguments = engine->arguments->children; + if (!engine->arguments || engine->arguments->children.size() != 1) + throw Exception("Lazy database require cache_expiration_time_seconds argument", ErrorCodes::BAD_ARGUMENTS); - if (arguments.size() != 1) - throw Exception("Lazy database require cache_expiration_time_seconds argument", - ErrorCodes::BAD_ARGUMENTS); + const auto & arguments = engine->arguments->children; const auto cache_expiration_time_seconds = arguments[0]->as()->value.safeGet(); return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context); diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index 76e7511de6c..6acee067db6 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -165,28 +165,35 @@ std::pair createTableFromAST( String getObjectDefinitionFromCreateQuery(const ASTPtr & query) { ASTPtr query_clone = query->clone(); - auto & create = query_clone->as(); + auto * create = query_clone->as(); - if (!create.is_dictionary) - create.attach = true; + if (!create) + { + std::ostringstream query_stream; + formatAST(*create, query_stream, true); + throw Exception("Query '" + query_stream.str() + "' is not CREATE query", ErrorCodes::LOGICAL_ERROR); + } + + if (!create->is_dictionary) + create->attach = true; /// We remove everything that is not needed for ATTACH from the query. - create.database.clear(); - create.as_database.clear(); - create.as_table.clear(); - create.if_not_exists = false; - create.is_populate = false; - create.replace_view = false; + create->database.clear(); + create->as_database.clear(); + create->as_table.clear(); + create->if_not_exists = false; + create->is_populate = false; + create->replace_view = false; /// For views it is necessary to save the SELECT query itself, for the rest - on the contrary - if (!create.is_view && !create.is_materialized_view && !create.is_live_view) - create.select = nullptr; + if (!create->is_view && !create->is_materialized_view && !create->is_live_view) + create->select = nullptr; - create.format = nullptr; - create.out_file = nullptr; + create->format = nullptr; + create->out_file = nullptr; std::ostringstream statement_stream; - formatAST(create, statement_stream, false); + formatAST(*create, statement_stream, false); statement_stream << '\n'; return statement_stream.str(); } @@ -260,18 +267,14 @@ void DatabaseOnDisk::createDictionary( { const auto & settings = context.getSettingsRef(); - /// Create a file with metadata if necessary - if the query is not ATTACH. - /// Write the query of `ATTACH table` to it. - /** The code is based on the assumption that all threads share the same order of operations * - creating the .sql.tmp file; - * - adding a table to `tables`; + * - adding a dictionary to `dictionaries`; * - rename .sql.tmp to .sql. */ - /// A race condition would be possible if a table with the same name is simultaneously created using CREATE and using ATTACH. + /// A race condition would be possible if a dictionary with the same name is simultaneously created using CREATE and using ATTACH. /// But there is protection from it - see using DDLGuard in InterpreterCreateQuery. - if (database.isDictionaryExist(context, dictionary_name)) throw Exception("Dictionary " + backQuote(database.getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); @@ -297,7 +300,7 @@ void DatabaseOnDisk::createDictionary( try { - /// Do not load it now + /// Do not load it now because we want more strict loading database.attachDictionary(dictionary_name, context, false); /// Load dictionary bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); @@ -305,7 +308,7 @@ void DatabaseOnDisk::createDictionary( context.getExternalDictionariesLoader().addDictionaryWithConfig( dict_name, database.getDatabaseName(), query->as(), !lazy_load); - /// If it was ATTACH query and file with table metadata already exist + /// If it was ATTACH query and file with dictionary metadata already exist /// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one. Poco::File(dictionary_metadata_tmp_path).renameTo(dictionary_metadata_path); @@ -354,7 +357,7 @@ void DatabaseOnDisk::removeDictionary( IDatabase & database, const Context & context, const String & dictionary_name, - Poco::Logger * log) + Poco::Logger * /*log*/) { database.detachDictionary(dictionary_name, context); @@ -366,15 +369,7 @@ void DatabaseOnDisk::removeDictionary( } catch (...) { - try - { - Poco::File(dictionary_metadata_path + ".tmp_drop").remove(); - return; - } - catch (...) - { - LOG_WARNING(log, getCurrentExceptionMessage(__PRETTY_FUNCTION__)); - } + /// If it's not possible for some reason database.attachDictionary(dictionary_name, context); throw; } diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 9ce5483aa55..0a5a58c1b8f 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -55,18 +55,6 @@ namespace { -String createDictionaryFromAST( - ASTCreateQuery ast_create_query, - const String & database_name) -{ - ast_create_query.database = database_name; - - if (!ast_create_query.dictionary_attributes_list) - throw Exception("Missing definition of dictionary attributes.", ErrorCodes::EMPTY_LIST_OF_ATTRIBUTES_PASSED); - - return ast_create_query.table; -} - void loadObject( Context & context, const ASTCreateQuery & query, @@ -78,7 +66,7 @@ try { if (query.is_dictionary) { - String dictionary_name = createDictionaryFromAST(query, database_name); + String dictionary_name = query.table; database.attachDictionary(dictionary_name, context, false); } else @@ -130,7 +118,7 @@ void DatabaseOrdinary::loadStoredObjects( * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, * which does not correspond to order tables creation and does not correspond to order of their location on disk. */ - using FileNames = std::map; + using FileNames = std::map; FileNames file_names; size_t total_dictionaries = 0; @@ -142,9 +130,9 @@ void DatabaseOrdinary::loadStoredObjects( auto ast = parseCreateQueryFromMetadataFile(full_path, log); if (ast) { - auto create_query = ast->as(); - file_names[file_name] = create_query; - total_dictionaries += create_query.is_dictionary; + auto * create_query = ast->as(); + file_names[file_name] = ast; + total_dictionaries += create_query->is_dictionary; } } catch (const Exception & e) @@ -179,7 +167,7 @@ void DatabaseOrdinary::loadStoredObjects( for (const auto & name_with_query : file_names) { - pool.scheduleOrThrowOnError([&]() { loadOneObject(name_with_query.second); }); + pool.scheduleOrThrowOnError([&]() { loadOneObject(name_with_query.second->as()); }); } pool.wait(); diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 00f86d9de2b..67d4e99cac6 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -28,8 +28,11 @@ namespace ErrorCodes namespace { -String unescapeString(const String & string) +/// Get value from field and convert it to string. +/// Also remove quotes from strings. +String getUnescapedFieldString(const Field & field) { + String string = applyVisitor(FieldVisitorToString(), field); if (!string.empty() && string.front() == '\'' && string.back() == '\'') return string.substr(1, string.size() - 2); return string; @@ -324,8 +327,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments( } else if (auto literal = pair->second->as(); literal) { - String str_literal = applyVisitor(FieldVisitorToString(), literal->value); - AutoPtr value(doc->createTextNode(unescapeString(str_literal))); + AutoPtr value(doc->createTextNode(getUnescapedFieldString(literal->value))); current_xml_element->appendChild(value); } else if (auto list = pair->second->as(); list) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 3658b476c8f..d46839c2d67 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -192,7 +192,7 @@ struct ContextShared bool shutdown_called = false; /// Do not allow simultaneous execution of DDL requests on the same table. - /// database -> table -> (mutex, counter), counter: how many threads are running a query on the table at the same time + /// database -> object -> (mutex, counter), counter: how many threads are running a query on the table at the same time /// For the duration of the operation, an element is placed here, and an object is returned, /// which deletes the element in the destructor when counter becomes zero. /// In case the element already exists, waits, when query will be executed in other thread. See class DDLGuard below. From 99fac92b1d2d9e5a4bb9c8d5de23e462d9fea842 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 13:49:11 +0300 Subject: [PATCH 073/122] Fix comment --- dbms/src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Databases/DatabaseOnDisk.cpp b/dbms/src/Databases/DatabaseOnDisk.cpp index 6acee067db6..cb5921d74b9 100644 --- a/dbms/src/Databases/DatabaseOnDisk.cpp +++ b/dbms/src/Databases/DatabaseOnDisk.cpp @@ -369,7 +369,7 @@ void DatabaseOnDisk::removeDictionary( } catch (...) { - /// If it's not possible for some reason + /// If remove was not possible for some reason database.attachDictionary(dictionary_name, context); throw; } From d3461f9d4634a09a31b0ffee10df5ae9142d6a5a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 14:03:16 +0300 Subject: [PATCH 074/122] TODO comments --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 68701e22e8b..573c655035d 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -631,6 +631,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) /// Table can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard. if (database->isTableExist(context, table_name)) { + /// TODO Check structure of table if (create.if_not_exists) return {}; else if (create.replace_view) @@ -716,6 +717,7 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create) if (database->isDictionaryExist(context, dictionary_name)) { + /// TODO Check structure of dictionary if (create.if_not_exists) return {}; else From dc7c4c1b9a775eb5feefe7f83593204e07aebc13 Mon Sep 17 00:00:00 2001 From: akonyaev Date: Tue, 22 Oct 2019 15:30:00 +0300 Subject: [PATCH 075/122] add tests for aggregation over empty decimal table --- .../00700_decimal_empty_aggregates.reference | 58 ++++++++++++ .../00700_decimal_empty_aggregates.sql | 90 +++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference create mode 100644 dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference new file mode 100644 index 00000000000..adc33ffce18 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference @@ -0,0 +1,58 @@ +0 0 0 +[0.0000,0.0000] [0.00000000,0.00000000] [0.00000000,0.00000000] +0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +(0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0) +0 0 0 +0 0 0 +0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 +0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 +0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 +0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 Decimal(38, 8) +0.0000 0.00000000 0.00000000 Decimal(38, 8) +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +[0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000] +[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +0.0000 0.00000000 0.00000000 Decimal(38, 8) +0.0000 0.00000000 0.00000000 Decimal(38, 8) +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +[0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000] +[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +0.0000 0.00000000 0.00000000 Decimal(38, 8) +0.0000 0.00000000 0.00000000 Decimal(38, 8) +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +0.0000 0.00000000 0.00000000 +[0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000] +[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +inf inf inf Float64 Float64 Float64 +nan nan nan +nan nan nan Float64 Float64 Float64 +nan nan nan +inf inf inf Float64 Float64 Float64 +nan nan nan +nan nan nan Float64 Float64 Float64 +nan nan nan diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql new file mode 100644 index 00000000000..0b0a37c5229 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql @@ -0,0 +1,90 @@ +DROP TABLE IF EXISTS decimal; + +CREATE TABLE decimal +( + a Decimal32(4), + b Decimal64(8), + c Decimal128(8) +) ENGINE = Memory; + +SELECT count(a), count(b), count(c) FROM decimal; +SELECT [min(a), max(a)], [min(b), max(b)], [min(c), max(c)] FROM decimal; + +SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOverflow(c) FROM decimal; +SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOverflow(c) FROM decimal WHERE a > 0; +SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOverflow(c) FROM decimal WHERE a < 0; +SELECT sum(a+1), sum(b+1), sum(c+1), sumWithOverflow(a+1), sumWithOverflow(b+1), sumWithOverflow(c+1) FROM decimal; +SELECT sum(a-1), sum(b-1), sum(c-1), sumWithOverflow(a-1), sumWithOverflow(b-1), sumWithOverflow(c-1) FROM decimal; + +SELECT avg(a), avg(b), avg(c) FROM decimal; +SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a > 0; +SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a < 0; + +SELECT (uniq(a), uniq(b), uniq(c)), + (uniqCombined(a), uniqCombined(b), uniqCombined(c)), + (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)), + (uniqExact(a), uniqExact(b), uniqExact(c)), + (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)) +FROM (SELECT * FROM decimal ORDER BY a); + +SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5; +SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 10; + +SELECT argMin(a, b), argMin(a, c), argMin(b, a), argMin(b, c), argMin(c, a), argMin(c, b) FROM decimal; +SELECT argMin(a, b), argMin(a, c), argMin(b, a), argMin(b, c), argMin(c, a), argMin(c, b) FROM decimal WHERE a > 0; +SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM decimal; +SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM decimal WHERE a < 0; + +SELECT median(a), median(b), median(c) as x, toTypeName(x) FROM decimal; +SELECT quantile(a), quantile(b), quantile(c) as x, toTypeName(x) FROM decimal WHERE a < 0; +SELECT quantile(0.0)(a), quantile(0.0)(b), quantile(0.0)(c) FROM decimal WHERE a >= 0; +SELECT quantile(0.2)(a), quantile(0.2)(b), quantile(0.2)(c) FROM decimal WHERE a >= 0; +SELECT quantile(0.4)(a), quantile(0.4)(b), quantile(0.4)(c) FROM decimal WHERE a >= 0; +SELECT quantile(0.6)(a), quantile(0.6)(b), quantile(0.6)(c) FROM decimal WHERE a >= 0; +SELECT quantile(0.8)(a), quantile(0.8)(b), quantile(0.8)(c) FROM decimal WHERE a >= 0; +SELECT quantile(1.0)(a), quantile(1.0)(b), quantile(1.0)(c) FROM decimal WHERE a >= 0; +SELECT quantiles(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a) FROM decimal; +SELECT quantiles(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b) FROM decimal; +SELECT quantiles(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c) FROM decimal; + +SELECT medianExact(a), medianExact(b), medianExact(c) as x, toTypeName(x) FROM decimal; +SELECT quantileExact(a), quantileExact(b), quantileExact(c) as x, toTypeName(x) FROM decimal WHERE a < 0; +SELECT quantileExact(0.0)(a), quantileExact(0.0)(b), quantileExact(0.0)(c) FROM decimal WHERE a >= 0; +SELECT quantileExact(0.2)(a), quantileExact(0.2)(b), quantileExact(0.2)(c) FROM decimal WHERE a >= 0; +SELECT quantileExact(0.4)(a), quantileExact(0.4)(b), quantileExact(0.4)(c) FROM decimal WHERE a >= 0; +SELECT quantileExact(0.6)(a), quantileExact(0.6)(b), quantileExact(0.6)(c) FROM decimal WHERE a >= 0; +SELECT quantileExact(0.8)(a), quantileExact(0.8)(b), quantileExact(0.8)(c) FROM decimal WHERE a >= 0; +SELECT quantileExact(1.0)(a), quantileExact(1.0)(b), quantileExact(1.0)(c) FROM decimal WHERE a >= 0; +SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a) FROM decimal; +SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b) FROM decimal; +SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c) FROM decimal; + +SELECT medianExactWeighted(a, 1), medianExactWeighted(b, 2), medianExactWeighted(c, 3) as x, toTypeName(x) FROM decimal; +SELECT quantileExactWeighted(a, 1), quantileExactWeighted(b, 2), quantileExactWeighted(c, 3) as x, toTypeName(x) FROM decimal WHERE a < 0; +SELECT quantileExactWeighted(0.0)(a, 1), quantileExactWeighted(0.0)(b, 2), quantileExactWeighted(0.0)(c, 3) FROM decimal WHERE a >= 0; +SELECT quantileExactWeighted(0.2)(a, 1), quantileExactWeighted(0.2)(b, 2), quantileExactWeighted(0.2)(c, 3) FROM decimal WHERE a >= 0; +SELECT quantileExactWeighted(0.4)(a, 1), quantileExactWeighted(0.4)(b, 2), quantileExactWeighted(0.4)(c, 3) FROM decimal WHERE a >= 0; +SELECT quantileExactWeighted(0.6)(a, 1), quantileExactWeighted(0.6)(b, 2), quantileExactWeighted(0.6)(c, 3) FROM decimal WHERE a >= 0; +SELECT quantileExactWeighted(0.8)(a, 1), quantileExactWeighted(0.8)(b, 2), quantileExactWeighted(0.8)(c, 3) FROM decimal WHERE a >= 0; +SELECT quantileExactWeighted(1.0)(a, 1), quantileExactWeighted(1.0)(b, 2), quantileExactWeighted(1.0)(c, 3) FROM decimal WHERE a >= 0; +SELECT quantilesExactWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a, 1) FROM decimal; +SELECT quantilesExactWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b, 2) FROM decimal; +SELECT quantilesExactWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c, 3) FROM decimal; + +SELECT varPop(a) AS va, varPop(b) AS vb, varPop(c) AS vc, toTypeName(va), toTypeName(vb), toTypeName(vc) FROM decimal; +SELECT varPop(toFloat64(a)), varPop(toFloat64(b)), varPop(toFloat64(c)) FROM decimal; +SELECT varSamp(a) AS va, varSamp(b) AS vb, varSamp(c) AS vc, toTypeName(va), toTypeName(vb), toTypeName(vc) FROM decimal; +SELECT varSamp(toFloat64(a)), varSamp(toFloat64(b)), varSamp(toFloat64(c)) FROM decimal; + +SELECT stddevPop(a) AS da, stddevPop(b) AS db, stddevPop(c) AS dc, toTypeName(da), toTypeName(db), toTypeName(dc) FROM decimal; +SELECT stddevPop(toFloat64(a)), stddevPop(toFloat64(b)), stddevPop(toFloat64(c)) FROM decimal; +SELECT stddevSamp(a) AS da, stddevSamp(b) AS db, stddevSamp(c) AS dc, toTypeName(da), toTypeName(db), toTypeName(dc) FROM decimal; +SELECT stddevSamp(toFloat64(a)), stddevSamp(toFloat64(b)), stddevSamp(toFloat64(c)) FROM decimal; + +SELECT covarPop(a, a), covarPop(b, b), covarPop(c, c) FROM decimal; -- { serverError 43 } +SELECT covarSamp(a, a), covarSamp(b, b), covarSamp(c, c) FROM decimal; -- { serverError 43 } +SELECT corr(a, a), corr(b, b), corr(c, c) FROM decimal; -- { serverError 43 } +SELECT 1 LIMIT 0; + +DROP TABLE decimal; + From 6655eb6358453f70b5e603e1b84fd0f8fd15c23d Mon Sep 17 00:00:00 2001 From: akonyaev Date: Tue, 22 Oct 2019 15:34:36 +0300 Subject: [PATCH 076/122] fix --- dbms/src/AggregateFunctions/ReservoirSampler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index c0b35408b71..ee0b02c3cda 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -110,7 +110,7 @@ public: { if (samples.empty()) { if (DB::IsDecimalNumber) { - return static_cast(0); + return 0; } return onEmpty(); } From ae42dd1cea1d130299a8d54546dab4611a5c3d92 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 15:57:58 +0300 Subject: [PATCH 077/122] Better doLoading locking logic --- dbms/src/Interpreters/ExternalLoader.cpp | 165 ++++++++++++++++------- 1 file changed, 113 insertions(+), 52 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5f0ccc9ae90..00d72f567de 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -20,6 +20,60 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + +/// RAII wrapper for LoadingDispatcher::doLoading() method. +/// Remove information about loading in destructor +struct LoadingIdsCleaner +{ + bool async; + std::mutex & mutex; + size_t loading_id; + std::unordered_map & loading_ids; + + LoadingIdsCleaner( + bool async_, + std::mutex & mutex_, + size_t loading_id_, + std::unordered_map & loading_ids_) + : async(async_) + , mutex(mutex_) + , loading_id(loading_id_) + , loading_ids(loading_ids_) + { + } + + ~LoadingIdsCleaner() + { + if (async) + { + std::lock_guard lock(mutex); + /// Remove the information about the thread after it finishes. + /// Should be done with lock + auto it = loading_ids.find(loading_id); + if (it != loading_ids.end()) + { + it->second.detach(); + loading_ids.erase(it); + } + } + } +}; + +/// Lock mutex only in async mode +/// In other case does nothing +struct LoadingGuardForAsyncLoad +{ + std::unique_lock lock; + LoadingGuardForAsyncLoad(bool async, std::mutex & mutex) + { + if (async) + lock = std::unique_lock(mutex); + } +}; + +} struct ExternalLoader::ObjectConfig { @@ -765,48 +819,15 @@ private: } } - /// Does the loading, possibly in the separate thread. - void doLoading(const String & name, size_t loading_id, bool async) + /// Load one object, returns object ptr or exception + /// Do not require locking + + std::pair loadOneObject( + const String & name, + const ObjectConfig & config, + bool config_changed, + LoadablePtr previous_version) { - std::unique_lock lock; - if (async) - { - setThreadName("ExterLdrJob"); - lock = std::unique_lock{mutex}; /// If `async == false` the mutex is already locked. - } - - SCOPE_EXIT({ - if (async) - { - if (!lock.owns_lock()) - lock.lock(); - /// Remove the information about the thread after it finishes. - auto it = loading_ids.find(loading_id); - if (it != loading_ids.end()) - { - it->second.detach(); - loading_ids.erase(it); - } - } - }); - - /// We check here if this is exactly the same loading as we planned to perform. - /// This check is necessary because the object could be removed or load with another config before this thread even starts. - Info * info = getInfo(name); - if (!info || !info->loading() || (info->loading_id != loading_id)) - return; - - ObjectConfig config = info->object_config; - bool config_changed = info->config_changed; - LoadablePtr previous_version = info->object; - size_t error_count = info->error_count; - - /// Use `create_function` to perform the actual loading. - /// It's much better to do it with `mutex` unlocked because the loading can take a lot of time - /// and require access to other objects. - if (async) - lock.unlock(); - LoadablePtr new_object; std::exception_ptr new_exception; try @@ -817,16 +838,33 @@ private: { new_exception = std::current_exception(); } + return std::make_pair(new_object, new_exception); - if (!new_object && !new_exception) - throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); + } - /// Lock the mutex again to store the changes. - if (async) - lock.lock(); - else if (new_exception) - std::rethrow_exception(new_exception); + /// Return single object info, checks loading_id and name + std::optional getSingleObjectInfo(const String & name, size_t loading_id, bool async) + { + LoadingGuardForAsyncLoad lock(async, mutex); + Info * info = getInfo(name); + if (!info || !info->loading() || (info->loading_id != loading_id)) + return {}; + return *info; + } + + /// Process loading result + /// Calculates next update time and process errors + void processLoadResult( + const String & name, + size_t loading_id, + LoadablePtr previous_version, + LoadablePtr new_object, + std::exception_ptr new_exception, + size_t error_count, + bool async) + { + LoadingGuardForAsyncLoad lock(async, mutex); /// Calculate a new update time. TimePoint next_update_time; try @@ -844,7 +882,7 @@ private: next_update_time = TimePoint::max(); } - info = getInfo(name); + Info * info = getInfo(name); /// And again we should check if this is still the same loading as we were doing. /// This is necessary because the object could be removed or load with another config while the `mutex` was unlocked. @@ -879,10 +917,33 @@ private: info->forced_to_reload = false; if (new_object) info->config_changed = false; + } - /// Notify `event` to recheck conditions in loadImpl() now. - if (async) - lock.unlock(); + + /// Does the loading, possibly in the separate thread. + void doLoading(const String & name, size_t loading_id, bool async) + { + /// We should clean loading_id when we finish, even in case of exceptions + LoadingIdsCleaner cleaner(async, mutex, loading_id, loading_ids); + + /// We check here if this is exactly the same loading as we planned to perform. + /// This check is necessary because the object could be removed or load with another config before this thread even starts. + std::optional info = getSingleObjectInfo(name, loading_id, async); + if (!info) + return; + + /// Use `create_function` to perform the actual loading. + /// It's much better to do it with `mutex` unlocked because the loading can take a lot of time + /// and require access to other objects. + auto [new_object, new_exception] = loadOneObject(name, info->object_config, info->config_changed, info->object); + if (!new_object && !new_exception) + throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); + + /// In synchronus mode we throw exception immediately + if (!async && new_exception) + std::rethrow_exception(new_exception); + + processLoadResult(name, loading_id, info->object, new_object, new_exception, info->error_count, async); event.notify_all(); } From dc6090a2ac64d62c2bad861d94beaa5ff2829d97 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 16:41:17 +0300 Subject: [PATCH 078/122] More readable code in external loader --- dbms/src/Interpreters/ExternalLoader.cpp | 96 ++++++++++-------------- 1 file changed, 39 insertions(+), 57 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 00d72f567de..b5c3ed601ff 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -23,44 +23,6 @@ namespace ErrorCodes namespace { -/// RAII wrapper for LoadingDispatcher::doLoading() method. -/// Remove information about loading in destructor -struct LoadingIdsCleaner -{ - bool async; - std::mutex & mutex; - size_t loading_id; - std::unordered_map & loading_ids; - - LoadingIdsCleaner( - bool async_, - std::mutex & mutex_, - size_t loading_id_, - std::unordered_map & loading_ids_) - : async(async_) - , mutex(mutex_) - , loading_id(loading_id_) - , loading_ids(loading_ids_) - { - } - - ~LoadingIdsCleaner() - { - if (async) - { - std::lock_guard lock(mutex); - /// Remove the information about the thread after it finishes. - /// Should be done with lock - auto it = loading_ids.find(loading_id); - if (it != loading_ids.end()) - { - it->second.detach(); - loading_ids.erase(it); - } - } - } -}; - /// Lock mutex only in async mode /// In other case does nothing struct LoadingGuardForAsyncLoad @@ -853,6 +815,18 @@ private: return *info; } + /// Removes object loading_id from loading_ids if it present + /// in other case do nothin should by done with lock + void finishObjectLoading(size_t loading_id, const LoadingGuardForAsyncLoad &) + { + auto it = loading_ids.find(loading_id); + if (it != loading_ids.end()) + { + it->second.detach(); + loading_ids.erase(it); + } + } + /// Process loading result /// Calculates next update time and process errors void processLoadResult( @@ -917,34 +891,42 @@ private: info->forced_to_reload = false; if (new_object) info->config_changed = false; + + finishObjectLoading(loading_id, lock); } /// Does the loading, possibly in the separate thread. void doLoading(const String & name, size_t loading_id, bool async) { - /// We should clean loading_id when we finish, even in case of exceptions - LoadingIdsCleaner cleaner(async, mutex, loading_id, loading_ids); + try + { + /// We check here if this is exactly the same loading as we planned to perform. + /// This check is necessary because the object could be removed or load with another config before this thread even starts. + std::optional info = getSingleObjectInfo(name, loading_id, async); + if (!info) + return; - /// We check here if this is exactly the same loading as we planned to perform. - /// This check is necessary because the object could be removed or load with another config before this thread even starts. - std::optional info = getSingleObjectInfo(name, loading_id, async); - if (!info) - return; + /// Use `create_function` to perform the actual loading. + /// It's much better to do it with `mutex` unlocked because the loading can take a lot of time + /// and require access to other objects. + auto [new_object, new_exception] = loadOneObject(name, info->object_config, info->config_changed, info->object); + if (!new_object && !new_exception) + throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); - /// Use `create_function` to perform the actual loading. - /// It's much better to do it with `mutex` unlocked because the loading can take a lot of time - /// and require access to other objects. - auto [new_object, new_exception] = loadOneObject(name, info->object_config, info->config_changed, info->object); - if (!new_object && !new_exception) - throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); + /// In synchronus mode we throw exception immediately + if (!async && new_exception) + std::rethrow_exception(new_exception); - /// In synchronus mode we throw exception immediately - if (!async && new_exception) - std::rethrow_exception(new_exception); - - processLoadResult(name, loading_id, info->object, new_object, new_exception, info->error_count, async); - event.notify_all(); + processLoadResult(name, loading_id, info->object, new_object, new_exception, info->error_count, async); + event.notify_all(); + } + catch (...) + { + LoadingGuardForAsyncLoad lock(async, mutex); + finishObjectLoading(loading_id, lock); + throw; + } } void cancelLoading(const String & name) From 9446fd2c4dcfd0f50b5befa12dceddb617dd7ca0 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 22 Oct 2019 17:45:01 +0300 Subject: [PATCH 079/122] Allowed to have some parts on destination disk or volume in MOVE PARTITION. #7424 --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index af985c02927..f530c537671 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2723,14 +2723,19 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & if (!disk) throw Exception("Disk " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK); - for (const auto & part : parts) - { - if (part->disk->getName() == disk->getName()) - throw Exception("Part " + part->name + " already on disk " + name, ErrorCodes::UNKNOWN_DISK); - } + parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) { + return part_ptr->disk->getName() == disk->getName(); + }), parts.end()); - if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) - throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); + if (!parts.empty()) + { + if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) + throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); + } + else + { + LOG_DEBUG(log, "No parts of partition " << partition_id << " to move to disk " << disk->getName()); + } } @@ -2763,8 +2768,27 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String if (part->disk->getName() == disk->getName()) throw Exception("Part " + part->name + " already on volume '" + name + "'", ErrorCodes::UNKNOWN_DISK); - if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) - throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); + parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) { + for (const auto & disk : volume->disks) + { + if (part_ptr->disk->getName() == disk->getName()) + { + return true; + } + } + return false; + }), parts.end()); + + + if (!parts.empty()) + { + if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) + throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); + } + else + { + LOG_DEBUG(log, "No parts of partition " << partition_id << " to move to volume " << volume->getName()); + } } From 71dd3a303e6117a93a086589e0db029a564ebb62 Mon Sep 17 00:00:00 2001 From: akonyaev Date: Tue, 22 Oct 2019 18:31:56 +0300 Subject: [PATCH 080/122] fix result type for avg aggregation --- .../AggregateFunctions/AggregateFunctionAvg.h | 13 +-- .../src/AggregateFunctions/ReservoirSampler.h | 6 +- dbms/src/DataTypes/DataTypesDecimal.h | 11 +++ .../00700_decimal_empty_aggregates.reference | 80 +++++++++---------- .../00700_decimal_empty_aggregates.sql | 12 +-- 5 files changed, 68 insertions(+), 54 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index 6700adf6b20..110b0b38839 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -33,7 +33,7 @@ struct AggregateFunctionAvgData if (count == 0) return static_cast(0); - return static_cast(sum) / count; + return static_cast(sum / count); } }; @@ -43,21 +43,23 @@ template class AggregateFunctionAvg final : public IAggregateFunctionDataHelper> { public: - using ResultType = std::conditional_t, Decimal128, Float64>; - using ResultDataType = std::conditional_t, DataTypeDecimal, DataTypeNumber>; + using ResultType = std::conditional_t, T, Float64>; + using ResultDataType = std::conditional_t, DataTypeDecimal, DataTypeNumber>; using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; - using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; /// ctor for native types AggregateFunctionAvg(const DataTypes & argument_types_) : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(0) + , precision(0) {} /// ctor for Decimals AggregateFunctionAvg(const IDataType & data_type, const DataTypes & argument_types_) : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(getDecimalScale(data_type)) + , precision(getDecimalPrecision(data_type)) {} String getName() const override { return "avg"; } @@ -65,7 +67,7 @@ public: DataTypePtr getReturnType() const override { if constexpr (IsDecimalNumber) - return std::make_shared(ResultDataType::maxPrecision(), scale); + return std::make_shared(precision, scale); else return std::make_shared(); } @@ -105,6 +107,7 @@ public: private: UInt32 scale; + UInt32 precision; }; diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index ee0b02c3cda..33e31b8eddd 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -108,10 +108,10 @@ public: */ double quantileInterpolated(double level) { - if (samples.empty()) { - if (DB::IsDecimalNumber) { + if (samples.empty()) + { + if (DB::IsDecimalNumber) return 0; - } return onEmpty(); } sortIfNeeded(); diff --git a/dbms/src/DataTypes/DataTypesDecimal.h b/dbms/src/DataTypes/DataTypesDecimal.h index e59a2b6e3fd..2626db5db8a 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.h +++ b/dbms/src/DataTypes/DataTypesDecimal.h @@ -243,6 +243,17 @@ inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value return default_value; } +inline UInt32 getDecimalPrecision(const IDataType & data_type, UInt32 default_value = std::numeric_limits::max()) + { + if (auto * decimal_type = checkDecimal(data_type)) + return decimal_type->getPrecision(); + if (auto * decimal_type = checkDecimal(data_type)) + return decimal_type->getPrecision(); + if (auto * decimal_type = checkDecimal(data_type)) + return decimal_type->getPrecision(); + return default_value; + } + /// template constexpr bool IsDataTypeDecimal = false; diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference index adc33ffce18..80424ad73c1 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference @@ -1,52 +1,52 @@ 0 0 0 -[0.0000,0.0000] [0.00000000,0.00000000] [0.00000000,0.00000000] -0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 +[0.0000,0.0000] [0.0000000,0.0000000] [0.00000000,0.00000000] +0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) (0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0) 0 0 0 0 0 0 -0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 -0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 -0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 -0.0000 0.0000 0.00000000 0.00000000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 Decimal(38, 8) -0.0000 0.00000000 0.00000000 Decimal(38, 8) -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 +0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 +0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 +0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 +0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 +0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 [0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000] +[0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000] [0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] -[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] -0.0000 0.00000000 0.00000000 Decimal(38, 8) -0.0000 0.00000000 0.00000000 Decimal(38, 8) -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 +0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 [0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000] +[0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000] [0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] -[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] -0.0000 0.00000000 0.00000000 Decimal(38, 8) -0.0000 0.00000000 0.00000000 Decimal(38, 8) -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 -0.0000 0.00000000 0.00000000 +0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 +0.0000 0.0000000 0.00000000 [0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000] -[0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] +[0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000,0.0000000] [0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000] inf inf inf Float64 Float64 Float64 nan nan nan diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql index 0b0a37c5229..ecc0609b780 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql @@ -2,9 +2,9 @@ DROP TABLE IF EXISTS decimal; CREATE TABLE decimal ( - a Decimal32(4), - b Decimal64(8), - c Decimal128(8) + a Decimal(6, 4), + b Decimal(16, 7), + c Decimal(20, 8) ) ENGINE = Memory; SELECT count(a), count(b), count(c) FROM decimal; @@ -16,9 +16,9 @@ SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOv SELECT sum(a+1), sum(b+1), sum(c+1), sumWithOverflow(a+1), sumWithOverflow(b+1), sumWithOverflow(c+1) FROM decimal; SELECT sum(a-1), sum(b-1), sum(c-1), sumWithOverflow(a-1), sumWithOverflow(b-1), sumWithOverflow(c-1) FROM decimal; -SELECT avg(a), avg(b), avg(c) FROM decimal; -SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a > 0; -SELECT avg(a), avg(b), avg(c) FROM decimal WHERE a < 0; +SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal; +SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal WHERE a > 0; +SELECT avg(a) as aa, avg(b) as ab, avg(c) as ac, toTypeName(aa), toTypeName(ab),toTypeName(ac) FROM decimal WHERE a < 0; SELECT (uniq(a), uniq(b), uniq(c)), (uniqCombined(a), uniqCombined(b), uniqCombined(c)), From 6d0dbf7e9554496082c6e69dfd058003d0166c1e Mon Sep 17 00:00:00 2001 From: akonyaev Date: Tue, 22 Oct 2019 18:51:34 +0300 Subject: [PATCH 081/122] add test returned datatype from quantile and median result for empty decimal data --- .../0_stateless/00700_decimal_empty_aggregates.reference | 4 ++-- .../queries/0_stateless/00700_decimal_empty_aggregates.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference index 80424ad73c1..a52fc64e29d 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference @@ -15,8 +15,8 @@ 0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000 -0.0000 0.0000000 0.00000000 Decimal(20, 8) -0.0000 0.0000000 0.00000000 Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql index ecc0609b780..2d14ffae49d 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.sql @@ -35,8 +35,8 @@ SELECT argMin(a, b), argMin(a, c), argMin(b, a), argMin(b, c), argMin(c, a), arg SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM decimal; SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM decimal WHERE a < 0; -SELECT median(a), median(b), median(c) as x, toTypeName(x) FROM decimal; -SELECT quantile(a), quantile(b), quantile(c) as x, toTypeName(x) FROM decimal WHERE a < 0; +SELECT median(a) as ma, median(b) as mb, median(c) as mc, toTypeName(ma),toTypeName(mb),toTypeName(mc) FROM decimal; +SELECT quantile(a) as qa, quantile(b) as qb, quantile(c) as qc, toTypeName(qa),toTypeName(qb),toTypeName(qc) FROM decimal WHERE a < 0; SELECT quantile(0.0)(a), quantile(0.0)(b), quantile(0.0)(c) FROM decimal WHERE a >= 0; SELECT quantile(0.2)(a), quantile(0.2)(b), quantile(0.2)(c) FROM decimal WHERE a >= 0; SELECT quantile(0.4)(a), quantile(0.4)(b), quantile(0.4)(c) FROM decimal WHERE a >= 0; From dd72f62f179916adf1cecdbcb9a183d0ee92a0f2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 22 Oct 2019 23:55:11 +0800 Subject: [PATCH 082/122] add perf test for subqueries with large scalars --- dbms/tests/performance/scalar.xml | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 dbms/tests/performance/scalar.xml diff --git a/dbms/tests/performance/scalar.xml b/dbms/tests/performance/scalar.xml new file mode 100644 index 00000000000..bb8044685d3 --- /dev/null +++ b/dbms/tests/performance/scalar.xml @@ -0,0 +1,35 @@ + + loop + + + + 30000 + + + 5000 + 60000 + + + + + + + + CREATE TABLE cdp_tags (tag_id String, mid_seqs AggregateFunction(groupBitmap, UInt32)) engine=MergeTree() ORDER BY (tag_id) SETTINGS index_granularity=1 + CREATE TABLE cdp_orders(order_id UInt64, order_complete_time DateTime, order_total_sales Float32, mid_seq UInt32) engine=MergeTree() PARTITION BY toYYYYMMDD(order_complete_time) ORDER BY (order_complete_time, order_id) + + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag1', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%9=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag2', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%8=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag3', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%7=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag4', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%6=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag5', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%5=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag6', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%4=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag7', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%3=0 + INSERT INTO cdp_tags(tag_id, mid_seqs) SELECT 'tag8', groupBitmapState(toUInt32(number)) FROM numbers(10000000) WHERE number%2=0 + INSERT INTO cdp_orders(order_id, order_complete_time, order_total_sales, mid_seq) SELECT number, addSeconds(toDateTime('2000-01-01 00:00:00'), number), number%1024, toUInt32(number) FROM numbers(10000000) + + WITH (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag1') AS bm1, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag2') AS bm2, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag3') AS bm3, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag4') AS bm4, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag5') AS bm5, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag6') AS bm6, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag7') AS bm7, (SELECT mid_seqs FROM cdp_tags WHERE tag_id='tag8') AS bm8, toDateTime('2000-01-01 00:00:00') AS ts_begin, addSeconds(toDateTime('2000-01-01 00:00:00'), 1e8) AS ts_end SELECT multiIf(bitmapContains(bm1, mid_seq), 1, bitmapContains(bm2, mid_seq), 2, bitmapContains(bm3, mid_seq), 3, bitmapContains(bm4, mid_seq), 4, bitmapContains(bm5, mid_seq), 5, bitmapContains(bm6, mid_seq), 6, bitmapContains(bm7, mid_seq), 7, bitmapContains(bm8, mid_seq), 8, 0) AS tag, count() AS gc, sum(order_total_sales) AS total FROM cdp_orders PREWHERE order_complete_time >= ts_begin AND order_complete_time < ts_end GROUP BY tag ORDER BY tag + + DROP TABLE IF EXISTS cdp_tags + DROP TABLE IF EXISTS cdp_orders + From dfa9b0c1498aa575275704ad9190abddba0b742a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 19:26:15 +0300 Subject: [PATCH 083/122] Remove complex logic with lazy load --- dbms/src/Databases/DatabaseOrdinary.cpp | 2 ++ dbms/src/Databases/DatabasesCommon.cpp | 10 ++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 0a5a58c1b8f..a50ad4615e4 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -179,6 +179,8 @@ void DatabaseOrdinary::loadStoredObjects( auto dictionaries_repository = std::make_unique(shared_from_this(), context); auto & external_loader = context.getExternalDictionariesLoader(); external_loader.addConfigRepository(getDatabaseName(), std::move(dictionaries_repository)); + bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); + external_loader.reload(!lazy_load); } diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 9b3b92ad765..1f1a1a7a004 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -172,10 +172,7 @@ void DatabaseWithOwnTablesBase::detachDictionary(const String & dictionary_name, } if (reload) - { - bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); - context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, !lazy_load); - } + context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name); } @@ -196,10 +193,7 @@ void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name, } if (load) - { - bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); - context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, !lazy_load); - } + context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, true); } void DatabaseWithOwnTablesBase::shutdown() From c12014ca15285f17a2b39a2f961af365c9ceac53 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Oct 2019 19:47:11 +0300 Subject: [PATCH 084/122] Fix shared build --- dbms/src/Dictionaries/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index 51ec9289ae6..027aebc9653 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -11,8 +11,8 @@ generate_code(CacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 generate_code(CacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/generated/) -list(REMOVE_ITEM clickhouse_dictionaries_sources DictionaryFactory.cpp DictionarySourceFactory.cpp DictionaryStructure.cpp) -list(REMOVE_ITEM clickhouse_dictionaries_headers DictionaryFactory.h DictionarySourceFactory.h DictionaryStructure.h) +list(REMOVE_ITEM clickhouse_dictionaries_sources DictionaryFactory.cpp DictionarySourceFactory.cpp DictionaryStructure.cpp getDictionaryConfigurationFromAST.cpp) +list(REMOVE_ITEM clickhouse_dictionaries_headers DictionaryFactory.h DictionarySourceFactory.h DictionaryStructure.h getDictionaryConfigurationFromAST.h) add_library(clickhouse_dictionaries ${clickhouse_dictionaries_sources}) target_link_libraries(clickhouse_dictionaries PRIVATE dbms clickhouse_common_io ${BTRIE_LIBRARIES}) From a413770a9785c11e7687041323497a2804f168fc Mon Sep 17 00:00:00 2001 From: Colum Date: Tue, 22 Oct 2019 10:02:51 -0700 Subject: [PATCH 085/122] Fixed spelling error in error message --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index af985c02927..082bc038a36 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3043,7 +3043,7 @@ DiskSpace::ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) if (reservation) return reservation; - throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(expected_size) + ", not enought space.", + throw Exception("Cannot reserve " + formatReadableSizeWithBinarySuffix(expected_size) + ", not enough space.", ErrorCodes::NOT_ENOUGH_SPACE); } From 7426542b8b5689238b9ca0b43bebea6d8bfb42c3 Mon Sep 17 00:00:00 2001 From: akonyaev Date: Wed, 23 Oct 2019 11:22:51 +0300 Subject: [PATCH 086/122] up precision for avg result to max of type --- dbms/src/AggregateFunctions/AggregateFunctionAvg.h | 5 +---- dbms/src/DataTypes/DataTypesDecimal.h | 11 ----------- .../00700_decimal_empty_aggregates.reference | 6 +++--- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index 110b0b38839..13aa9157706 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -52,14 +52,12 @@ public: AggregateFunctionAvg(const DataTypes & argument_types_) : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(0) - , precision(0) {} /// ctor for Decimals AggregateFunctionAvg(const IDataType & data_type, const DataTypes & argument_types_) : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(getDecimalScale(data_type)) - , precision(getDecimalPrecision(data_type)) {} String getName() const override { return "avg"; } @@ -67,7 +65,7 @@ public: DataTypePtr getReturnType() const override { if constexpr (IsDecimalNumber) - return std::make_shared(precision, scale); + return std::make_shared(ResultDataType::maxPrecision(), scale); else return std::make_shared(); } @@ -107,7 +105,6 @@ public: private: UInt32 scale; - UInt32 precision; }; diff --git a/dbms/src/DataTypes/DataTypesDecimal.h b/dbms/src/DataTypes/DataTypesDecimal.h index 2626db5db8a..e59a2b6e3fd 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.h +++ b/dbms/src/DataTypes/DataTypesDecimal.h @@ -243,17 +243,6 @@ inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value return default_value; } -inline UInt32 getDecimalPrecision(const IDataType & data_type, UInt32 default_value = std::numeric_limits::max()) - { - if (auto * decimal_type = checkDecimal(data_type)) - return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) - return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) - return decimal_type->getPrecision(); - return default_value; - } - /// template constexpr bool IsDataTypeDecimal = false; diff --git a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference index a52fc64e29d..580cf0e26b7 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_empty_aggregates.reference @@ -5,9 +5,9 @@ 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000 -0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) -0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) -0.0000 0.0000000 0.00000000 Decimal(6, 4) Decimal(16, 7) Decimal(20, 8) +0.0000 0.0000000 0.00000000 Decimal(9, 4) Decimal(18, 7) Decimal(38, 8) +0.0000 0.0000000 0.00000000 Decimal(9, 4) Decimal(18, 7) Decimal(38, 8) +0.0000 0.0000000 0.00000000 Decimal(9, 4) Decimal(18, 7) Decimal(38, 8) (0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0) 0 0 0 0 0 0 From fb349757ba5dddc77f7ee7eaba2b7f1f4a4e9e2a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Oct 2019 12:27:34 +0300 Subject: [PATCH 087/122] Fix ubsan error --- dbms/src/Interpreters/ExternalLoader.cpp | 7 ++++--- dbms/src/Interpreters/IExternalLoadable.cpp | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index b5c3ed601ff..c57693ce374 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -856,6 +856,10 @@ private: next_update_time = TimePoint::max(); } + /// In synchronus mode we throw exception immediately + if (!async && new_exception) + std::rethrow_exception(new_exception); + Info * info = getInfo(name); /// And again we should check if this is still the same loading as we were doing. @@ -914,9 +918,6 @@ private: if (!new_object && !new_exception) throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); - /// In synchronus mode we throw exception immediately - if (!async && new_exception) - std::rethrow_exception(new_exception); processLoadResult(name, loading_id, info->object, new_object, new_exception, info->error_count, async); event.notify_all(); diff --git a/dbms/src/Interpreters/IExternalLoadable.cpp b/dbms/src/Interpreters/IExternalLoadable.cpp index 18439cf999f..5c2df092179 100644 --- a/dbms/src/Interpreters/IExternalLoadable.cpp +++ b/dbms/src/Interpreters/IExternalLoadable.cpp @@ -24,6 +24,11 @@ UInt64 calculateDurationWithBackoff(pcg64 & rnd_engine, size_t error_count) if (error_count < 1) error_count = 1; + + /// max seconds is 600 and 2 ** 10 == 1024 + if (error_count > 11) + error_count = 11; + std::uniform_int_distribution distribution(0, static_cast(std::exp2(error_count - 1))); return std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine)); } From 0abb2e538bc95aeeeb230fb19e006a47f1327642 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Oct 2019 12:36:20 +0300 Subject: [PATCH 088/122] Remove strange logic --- dbms/src/Interpreters/ExternalLoader.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index c57693ce374..5a96457bd83 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -856,9 +856,6 @@ private: next_update_time = TimePoint::max(); } - /// In synchronus mode we throw exception immediately - if (!async && new_exception) - std::rethrow_exception(new_exception); Info * info = getInfo(name); @@ -867,7 +864,7 @@ private: if (!info || !info->loading() || (info->loading_id != loading_id)) return; - if (new_exception) + if (new_exception && async) { auto next_update_time_description = [next_update_time] { From 6a0246f58ef03ed836ccffb6fb5d2ca5fb380cac Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Oct 2019 12:40:09 +0300 Subject: [PATCH 089/122] Fix if --- dbms/src/Interpreters/ExternalLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5a96457bd83..0320a1b691a 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -864,7 +864,7 @@ private: if (!info || !info->loading() || (info->loading_id != loading_id)) return; - if (new_exception && async) + if (new_exception) { auto next_update_time_description = [next_update_time] { From 04a6c6ac4d5b492b57cc7d2d0fbe87e94019e82d Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 23 Oct 2019 13:51:06 +0300 Subject: [PATCH 090/122] Docs links fix (#7448) * Typo fix. * Links fix. * Fixed links in docs. * More fixes. * Link fixes. * Fixed links. --- docs/en/operations/system_tables.md | 5 +++++ docs/en/query_language/alter.md | 2 ++ 2 files changed, 7 insertions(+) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 47bbf0266ac..de0c277f100 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -757,4 +757,9 @@ If there were problems with mutating some parts, the following columns contain a **latest_fail_reason** - The exception message that caused the most recent part mutation failure. + +## system.disks {#system_tables-disks} + +## system.storage_policies {#system_tables-storage_policies} + [Original article](https://clickhouse.yandex/docs/en/operations/system_tables/) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 5c1d6331add..b7b37924c71 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -355,6 +355,8 @@ Before downloading, the system checks if the partition exists and the table stru Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table. +#### MOVE PARTITION|PART {#alter_move-partition} + #### How To Set Partition Expression {#alter-how-to-specify-part-expr} You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: From 7546c315a6089069c2864c77d575b4338a84a97b Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 23 Oct 2019 14:25:51 +0300 Subject: [PATCH 091/122] Added integration test for #7414. --- .../config.d/storage_configuration.xml | 21 +++++++++++++++ .../integration/test_multiple_disks/test.py | 27 +++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml b/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml index d41ba6066c4..2e6a1f80a6d 100644 --- a/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml +++ b/dbms/tests/integration/test_multiple_disks/configs/config.d/storage_configuration.xml @@ -74,6 +74,27 @@ + + + + + default + 0 + + + external + + + jbod1 + 1024 + + + jbod2 + 1024000000 + + + + diff --git a/dbms/tests/integration/test_multiple_disks/test.py b/dbms/tests/integration/test_multiple_disks/test.py index 4ee337229c9..d0e2cb6ef19 100644 --- a/dbms/tests/integration/test_multiple_disks/test.py +++ b/dbms/tests/integration/test_multiple_disks/test.py @@ -1,8 +1,9 @@ -import time +import json import pytest import random +import re import string -import json +import time from multiprocessing.dummy import Pool from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster @@ -193,6 +194,28 @@ def get_random_string(length): def get_used_disks_for_table(node, table_name): return node.query("select disk_name from system.parts where table == '{}' and active=1 order by modification_time".format(table_name)).strip().split('\n') +def test_no_warning_about_zero_max_data_part_size(start_cluster): + def get_log(node): + return node.exec_in_container(["bash", "-c", "cat /var/log/clickhouse-server/clickhouse-server.log"]) + + for node in (node1, node2): + node.query(""" + CREATE TABLE default.test_warning_table ( + s String + ) ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy='small_jbod_with_external' + """) + node.query(""" + DROP TABLE default.test_warning_table + """) + log = get_log(node) + assert not re.search("Warning.*Volume.*special_warning_zero_volume", log) + assert not re.search("Warning.*Volume.*special_warning_default_volume", log) + assert re.search("Warning.*Volume.*special_warning_small_volume", log) + assert not re.search("Warning.*Volume.*special_warning_big_volume", log) + + @pytest.mark.parametrize("name,engine", [ ("mt_on_jbod","MergeTree()"), ("replicated_mt_on_jbod","ReplicatedMergeTree('/clickhouse/replicated_mt_on_jbod', '1')",), From c3519ff3762c38f34aa3d51f4ed9fdb0044ae2eb Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Oct 2019 16:02:40 +0300 Subject: [PATCH 092/122] Better check of dictionary lifetime for updates --- dbms/src/Interpreters/ExternalLoader.cpp | 16 +++++++++++---- .../01018_ddl_dictionaries_select.reference | 2 ++ .../01018_ddl_dictionaries_select.sql | 20 +++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 0320a1b691a..6486b394623 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -848,7 +848,11 @@ private: else error_count = 0; - next_update_time = calculateNextUpdateTime(new_object, error_count); + LoadablePtr object = previous_version; + if (new_object) + object = new_object; + + next_update_time = calculateNextUpdateTime(object, error_count); } catch (...) { @@ -963,7 +967,8 @@ private: TimePoint calculateNextUpdateTime(const LoadablePtr & loaded_object, size_t error_count) const { static constexpr auto never = TimePoint::max(); - if (!error_count) + + if (loaded_object) { if (!loaded_object->supportUpdates()) return never; @@ -973,8 +978,11 @@ private: if (lifetime.min_sec == 0 || lifetime.max_sec == 0) return never; - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - return std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + if (!error_count) + { + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + return std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + } } return std::chrono::system_clock::now() + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference index 5afa7f27963..a7f56f9ee03 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -15,3 +15,5 @@ dict2 Dictionary table_for_dict MergeTree database_for_dict dict1 ComplexKeyCache database_for_dict dict2 Hashed +6 +6 diff --git a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 19f2d1244a0..cb63ce86ada 100644 --- a/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/dbms/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -95,4 +95,24 @@ SELECT name, engine FROM system.tables WHERE database = 'database_for_dict' ORDE SELECT database, name, type FROM system.dictionaries WHERE database = 'database_for_dict' ORDER BY name; +-- check dictionary will not update +CREATE DICTIONARY database_for_dict.dict3 +( + key_column UInt64 DEFAULT 0, + some_column String EXPRESSION toString(fourth_column), + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +LIFETIME(0) +LAYOUT(HASHED()); + +SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12)); + +DROP TABLE database_for_dict.table_for_dict; + +SYSTEM RELOAD DICTIONARIES; + +SELECT dictGetString('database_for_dict.dict3', 'some_column', toUInt64(12)); + DROP DATABASE IF EXISTS database_for_dict; From 295864e6e0857e49fc163debe5b47d1b1b395a35 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 20 Oct 2019 04:36:35 +0800 Subject: [PATCH 093/122] better scalar query --- dbms/programs/benchmark/Benchmark.cpp | 2 +- dbms/programs/server/TCPHandler.cpp | 52 ++++---- dbms/programs/server/TCPHandler.h | 2 +- dbms/src/Client/Connection.cpp | 46 ++++++- dbms/src/Client/Connection.h | 4 +- dbms/src/Client/MultiplexedConnections.cpp | 15 +++ dbms/src/Client/MultiplexedConnections.h | 2 + dbms/src/Common/CurrentMetrics.cpp | 1 + dbms/src/Common/ErrorCodes.cpp | 2 + dbms/src/Core/Protocol.h | 3 +- dbms/src/Core/Settings.h | 2 + .../DataStreams/RemoteBlockInputStream.cpp | 19 ++- dbms/src/DataStreams/RemoteBlockInputStream.h | 11 +- dbms/src/Functions/getScalar.cpp | 68 ++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + .../ClusterProxy/SelectStreamFactory.cpp | 13 +- .../ClusterProxy/SelectStreamFactory.h | 3 + dbms/src/Interpreters/Context.cpp | 30 +++++ dbms/src/Interpreters/Context.h | 8 ++ .../ExecuteScalarSubqueriesVisitor.cpp | 122 +++++++++++------- .../ExecuteScalarSubqueriesVisitor.h | 2 + .../Interpreters/InterpreterSelectQuery.cpp | 6 + dbms/src/Interpreters/SyntaxAnalyzer.cpp | 6 +- dbms/src/Interpreters/SyntaxAnalyzer.h | 6 + dbms/src/Storages/StorageDistributed.cpp | 6 +- 25 files changed, 341 insertions(+), 92 deletions(-) create mode 100644 dbms/src/Functions/getScalar.cpp diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index 195f8c01270..c0b2eccfd29 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -365,7 +365,7 @@ private: Stopwatch watch; RemoteBlockInputStream stream( *(*connection_entries[connection_index]), - query, {}, global_context, &settings, nullptr, Tables(), query_processing_stage); + query, {}, global_context, &settings, nullptr, Scalars(), Tables(), query_processing_stage); Progress progress; stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 83749975f34..88b1eb6ae3e 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -850,9 +850,10 @@ bool TCPHandler::receivePacket() return true; case Protocol::Client::Data: + case Protocol::Client::Scalar: if (state.empty()) receiveUnexpectedData(); - return receiveData(); + return receiveData(packet_type == Protocol::Client::Scalar); case Protocol::Client::Ping: writeVarUInt(Protocol::Server::Pong, *out); @@ -957,39 +958,44 @@ void TCPHandler::receiveUnexpectedQuery() throw NetException("Unexpected packet Query received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); } -bool TCPHandler::receiveData() +bool TCPHandler::receiveData(bool scalar) { initBlockInput(); /// The name of the temporary table for writing data, default to empty string - String external_table_name; - readStringBinary(external_table_name, *in); + String name; + readStringBinary(name, *in); /// Read one block from the network and write it down Block block = state.block_in->read(); if (block) { - /// If there is an insert request, then the data should be written directly to `state.io.out`. - /// Otherwise, we write the blocks in the temporary `external_table_name` table. - if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input) - { - StoragePtr storage; - /// If such a table does not exist, create it. - if (!(storage = query_context->tryGetExternalTable(external_table_name))) - { - NamesAndTypesList columns = block.getNamesAndTypesList(); - storage = StorageMemory::create("_external", external_table_name, ColumnsDescription{columns}, ConstraintsDescription{}); - storage->startup(); - query_context->addExternalTable(external_table_name, storage); - } - /// The data will be written directly to the table. - state.io.out = storage->write(ASTPtr(), *query_context); - } - if (state.need_receive_data_for_input) - state.block_for_input = block; + if (scalar) + query_context->addScalar(name, block); else - state.io.out->write(block); + { + /// If there is an insert request, then the data should be written directly to `state.io.out`. + /// Otherwise, we write the blocks in the temporary `external_table_name` table. + if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input) + { + StoragePtr storage; + /// If such a table does not exist, create it. + if (!(storage = query_context->tryGetExternalTable(name))) + { + NamesAndTypesList columns = block.getNamesAndTypesList(); + storage = StorageMemory::create("_external", name, ColumnsDescription{columns}, ConstraintsDescription{}); + storage->startup(); + query_context->addExternalTable(name, storage); + } + /// The data will be written directly to the table. + state.io.out = storage->write(ASTPtr(), *query_context); + } + if (state.need_receive_data_for_input) + state.block_for_input = block; + else + state.io.out->write(block); + } return true; } else diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index 834d66bba28..561ed4d0eca 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -153,7 +153,7 @@ private: void receiveHello(); bool receivePacket(); void receiveQuery(); - bool receiveData(); + bool receiveData(bool scalar); bool readDataNext(const size_t & poll_interval, const int & receive_timeout); void readData(const Settings & global_settings); std::tuple getReadTimeouts(const Settings & global_settings); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index a6e533d8dd2..95dff73f870 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -30,6 +30,7 @@ namespace CurrentMetrics { + extern const Metric SendScalars; extern const Metric SendExternalTables; } @@ -441,7 +442,7 @@ void Connection::sendCancel() } -void Connection::sendData(const Block & block, const String & name) +void Connection::sendData(const Block & block, const String & name, bool scalar) { //LOG_TRACE(log_wrapper.get(), "Sending data"); @@ -455,7 +456,10 @@ void Connection::sendData(const Block & block, const String & name) block_out = std::make_shared(*maybe_compressed_out, server_revision, block.cloneEmpty()); } - writeVarUInt(Protocol::Client::Data, *out); + if (scalar) + writeVarUInt(Protocol::Client::Scalar, *out); + else + writeVarUInt(Protocol::Client::Data, *out); writeStringBinary(name, *out); size_t prev_bytes = out->count(); @@ -484,6 +488,44 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String } +void Connection::sendScalarsData(Scalars & data) +{ + if (data.empty()) + return; + + Stopwatch watch; + size_t out_bytes = out ? out->count() : 0; + size_t maybe_compressed_out_bytes = maybe_compressed_out ? maybe_compressed_out->count() : 0; + size_t rows = 0; + + CurrentMetrics::Increment metric_increment{CurrentMetrics::SendScalars}; + + for (auto & elem : data) + { + rows += elem.second.rows(); + sendData(elem.second, elem.first, true /* scalar */); + } + + out_bytes = out->count() - out_bytes; + maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes; + double elapsed = watch.elapsedSeconds(); + + std::stringstream msg; + msg << std::fixed << std::setprecision(3); + msg << "Sent data for " << data.size() << " scalars, total " << rows << " rows in " << elapsed << " sec., " + << static_cast(rows / watch.elapsedSeconds()) << " rows/sec., " + << maybe_compressed_out_bytes / 1048576.0 << " MiB (" << maybe_compressed_out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"; + + if (compression == Protocol::Compression::Enable) + msg << ", compressed " << static_cast(maybe_compressed_out_bytes) / out_bytes << " times to " + << out_bytes / 1048576.0 << " MiB (" << out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"; + else + msg << ", no compression."; + + LOG_DEBUG(log_wrapper.get(), msg.rdbuf()); +} + + void Connection::sendExternalTablesData(ExternalTablesData & data) { if (data.empty()) diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index 03a771c257f..8b507a4172a 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -133,7 +133,9 @@ public: void sendCancel(); /// Send block of data; if name is specified, server will write it to external (temporary) table of that name. - void sendData(const Block & block, const String & name = ""); + void sendData(const Block & block, const String & name = "", bool scalar = false); + /// Send all scalars. + void sendScalarsData(Scalars & data); /// Send all contents of external (temporary) tables. void sendExternalTablesData(ExternalTablesData & data); diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp index 5c05ee9c5f5..d7934924242 100644 --- a/dbms/src/Client/MultiplexedConnections.cpp +++ b/dbms/src/Client/MultiplexedConnections.cpp @@ -51,6 +51,21 @@ MultiplexedConnections::MultiplexedConnections( active_connection_count = connections.size(); } +void MultiplexedConnections::sendScalarsData(Scalars & data) +{ + std::lock_guard lock(cancel_mutex); + + if (!sent_query) + throw Exception("Cannot send scalars data: query not yet sent.", ErrorCodes::LOGICAL_ERROR); + + for (ReplicaState & state : replica_states) + { + Connection * connection = state.connection; + if (connection != nullptr) + connection->sendScalarsData(data); + } +} + void MultiplexedConnections::sendExternalTablesData(std::vector & data) { std::lock_guard lock(cancel_mutex); diff --git a/dbms/src/Client/MultiplexedConnections.h b/dbms/src/Client/MultiplexedConnections.h index b8567dcd979..b26c9569422 100644 --- a/dbms/src/Client/MultiplexedConnections.h +++ b/dbms/src/Client/MultiplexedConnections.h @@ -27,6 +27,8 @@ public: std::vector && connections, const Settings & settings_, const ThrottlerPtr & throttler_); + /// Send all scalars to replicas. + void sendScalarsData(Scalars & data); /// Send all content of external tables to replicas. void sendExternalTablesData(std::vector & data); diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index 6bd99fb8f01..5ad4a281f80 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -21,6 +21,7 @@ M(OpenFileForWrite, "Number of files open for writing") \ M(Read, "Number of read (read, pread, io_getevents, etc.) syscalls in fly") \ M(Write, "Number of write (write, pwrite, io_getevents, etc.) syscalls in fly") \ + M(SendScalars, "Number of connections that are sending data for scalars to remote servers.") \ M(SendExternalTables, "Number of connections that are sending data for external tables to remote servers. External tables are used to implement GLOBAL IN and GLOBAL JOIN operators with distributed subqueries.") \ M(QueryThread, "Number of query processing threads") \ M(ReadonlyReplica, "Number of Replicated tables that are currently in readonly state due to re-initialization after ZooKeeper session loss or due to startup without ZooKeeper configured.") \ diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index cfa89af96d4..3086e25680d 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -459,6 +459,8 @@ namespace ErrorCodes extern const int DICTIONARY_ACCESS_DENIED = 482; extern const int TOO_MANY_REDIRECTS = 483; extern const int INTERNAL_REDIS_ERROR = 484; + extern const int SCALAR_ALREADY_EXISTS = 485; + extern const int UNKNOWN_SCALAR = 486; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index b50d018f9ce..1992234b3fe 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -112,7 +112,8 @@ namespace Protocol Cancel = 3, /// Cancel the query execution. Ping = 4, /// Check that connection to the server is alive. TablesStatusRequest = 5, /// Check status of tables on the server. - KeepAlive = 6 /// Keep the connection alive + KeepAlive = 6, /// Keep the connection alive + Scalar = 7 /// A block of data (compressed or not). }; inline const char * toString(UInt64 packet) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 30752113a6b..9361b909590 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -379,6 +379,8 @@ struct Settings : public SettingsCollection M(SettingUInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.") \ M(SettingUInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.") \ \ + M(SettingBool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.") \ + \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.cpp b/dbms/src/DataStreams/RemoteBlockInputStream.cpp index 9e9d47f9516..61432939a95 100644 --- a/dbms/src/DataStreams/RemoteBlockInputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockInputStream.cpp @@ -23,8 +23,8 @@ namespace ErrorCodes RemoteBlockInputStream::RemoteBlockInputStream( Connection & connection, const String & query_, const Block & header_, const Context & context_, const Settings * settings, - const ThrottlerPtr & throttler, const Tables & external_tables_, QueryProcessingStage::Enum stage_) - : header(header_), query(query_), context(context_), external_tables(external_tables_), stage(stage_) + const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_) + : header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_) { if (settings) context.setSettings(*settings); @@ -38,8 +38,8 @@ RemoteBlockInputStream::RemoteBlockInputStream( RemoteBlockInputStream::RemoteBlockInputStream( std::vector && connections, const String & query_, const Block & header_, const Context & context_, const Settings * settings, - const ThrottlerPtr & throttler, const Tables & external_tables_, QueryProcessingStage::Enum stage_) - : header(header_), query(query_), context(context_), external_tables(external_tables_), stage(stage_) + const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_) + : header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_) { if (settings) context.setSettings(*settings); @@ -54,8 +54,8 @@ RemoteBlockInputStream::RemoteBlockInputStream( RemoteBlockInputStream::RemoteBlockInputStream( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, const Context & context_, const Settings * settings, - const ThrottlerPtr & throttler, const Tables & external_tables_, QueryProcessingStage::Enum stage_) - : header(header_), query(query_), context(context_), external_tables(external_tables_), stage(stage_) + const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, QueryProcessingStage::Enum stage_) + : header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_) { if (settings) context.setSettings(*settings); @@ -120,6 +120,11 @@ void RemoteBlockInputStream::cancel(bool kill) tryCancel("Cancelling query"); } +void RemoteBlockInputStream::sendScalars() +{ + multiplexed_connections->sendScalarsData(scalars); +} + void RemoteBlockInputStream::sendExternalTables() { size_t count = multiplexed_connections->size(); @@ -308,6 +313,8 @@ void RemoteBlockInputStream::sendQuery() established = false; sent_query = true; + if (settings.enable_scalar_subquery_optimization) + sendScalars(); sendExternalTables(); } diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h index af8d79c324c..89f4e84f080 100644 --- a/dbms/src/DataStreams/RemoteBlockInputStream.h +++ b/dbms/src/DataStreams/RemoteBlockInputStream.h @@ -25,7 +25,7 @@ public: RemoteBlockInputStream( Connection & connection, const String & query_, const Block & header_, const Context & context_, const Settings * settings = nullptr, - const ThrottlerPtr & throttler = nullptr, const Tables & external_tables_ = Tables(), + const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); /// Accepts several connections already taken from pool. @@ -33,7 +33,7 @@ public: RemoteBlockInputStream( std::vector && connections, const String & query_, const Block & header_, const Context & context_, const Settings * settings = nullptr, - const ThrottlerPtr & throttler = nullptr, const Tables & external_tables_ = Tables(), + const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); /// Takes a pool and gets one or several connections from it. @@ -41,7 +41,7 @@ public: RemoteBlockInputStream( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, const Context & context_, const Settings * settings = nullptr, - const ThrottlerPtr & throttler = nullptr, const Tables & external_tables_ = Tables(), + const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); ~RemoteBlockInputStream() override; @@ -71,6 +71,9 @@ public: Block getHeader() const override { return header; } protected: + /// Send all scalars to remote servers + void sendScalars(); + /// Send all temporary tables to remote servers void sendExternalTables(); @@ -103,6 +106,8 @@ private: String query_id = ""; Context context; + /// Scalars needed to be sent to remote servers + Scalars scalars; /// Temporary tables needed to be sent to remote servers Tables external_tables; QueryProcessingStage::Enum stage; diff --git a/dbms/src/Functions/getScalar.cpp b/dbms/src/Functions/getScalar.cpp new file mode 100644 index 00000000000..b04fcdc83f5 --- /dev/null +++ b/dbms/src/Functions/getScalar.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +/** Get scalar value of sub queries from query context via IAST::Hash. + */ +class FunctionGetScalar : public IFunction +{ +public: + static constexpr auto name = "__getScalar"; + static FunctionPtr create(const Context & context) + { + return std::make_shared(context); + } + + FunctionGetScalar(const Context & context_) : context(context_) {} + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 1; + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1 || !isString(arguments[0].type) || !isColumnConst(*arguments[0].column)) + throw Exception("Function " + getName() + " accepts one const string argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + auto scalar_name = assert_cast(*arguments[0].column).getField().get(); + scalar = context.getScalar(scalar_name).getByPosition(0); + return scalar.type; + } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = ColumnConst::create(scalar.column, input_rows_count); + } + +private: + mutable ColumnWithTypeAndName scalar; + const Context & context; +}; + + +void registerFunctionGetScalar(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index 3c0e03e46c3..ae75b9c0962 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -52,6 +52,7 @@ void registerFunctionEvalMLMethod(FunctionFactory &); void registerFunctionBasename(FunctionFactory &); void registerFunctionTransform(FunctionFactory &); void registerFunctionGetMacro(FunctionFactory &); +void registerFunctionGetScalar(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -106,6 +107,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionBasename(factory); registerFunctionTransform(factory); registerFunctionGetMacro(factory); + registerFunctionGetScalar(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 905827205b4..3c141b56152 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -33,11 +33,13 @@ SelectStreamFactory::SelectStreamFactory( const Block & header_, QueryProcessingStage::Enum processed_stage_, QualifiedTableName main_table_, + const Scalars & scalars_, const Tables & external_tables_) : header(header_), processed_stage{processed_stage_}, main_table(std::move(main_table_)), table_func_ptr{nullptr}, + scalars{scalars_}, external_tables{external_tables_} { } @@ -46,10 +48,12 @@ SelectStreamFactory::SelectStreamFactory( const Block & header_, QueryProcessingStage::Enum processed_stage_, ASTPtr table_func_ptr_, + const Scalars & scalars_, const Tables & external_tables_) : header(header_), processed_stage{processed_stage_}, table_func_ptr{table_func_ptr_}, + scalars{scalars_}, external_tables{external_tables_} { } @@ -92,7 +96,8 @@ void SelectStreamFactory::createForShard( auto emplace_remote_stream = [&]() { - auto stream = std::make_shared(shard_info.pool, query, header, context, nullptr, throttler, external_tables, processed_stage); + auto stream = std::make_shared( + shard_info.pool, query, header, context, nullptr, throttler, scalars, external_tables, processed_stage); stream->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) stream->setMainTable(main_table); @@ -190,8 +195,8 @@ void SelectStreamFactory::createForShard( auto lazily_create_stream = [ pool = shard_info.pool, shard_num = shard_info.shard_num, query, header = header, query_ast, context, throttler, - main_table = main_table, table_func_ptr = table_func_ptr, external_tables = external_tables, stage = processed_stage, - local_delay]() + main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, + stage = processed_stage, local_delay]() -> BlockInputStreamPtr { auto current_settings = context.getSettingsRef(); @@ -233,7 +238,7 @@ void SelectStreamFactory::createForShard( connections.emplace_back(std::move(try_result.entry)); return std::make_shared( - std::move(connections), query, header, context, nullptr, throttler, external_tables, stage); + std::move(connections), query, header, context, nullptr, throttler, scalars, external_tables, stage); } }; diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 20bac52d393..c3a55f5348f 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -18,6 +18,7 @@ public: const Block & header_, QueryProcessingStage::Enum processed_stage_, QualifiedTableName main_table_, + const Scalars & scalars_, const Tables & external_tables); /// TableFunction in a query. @@ -25,6 +26,7 @@ public: const Block & header_, QueryProcessingStage::Enum processed_stage_, ASTPtr table_func_ptr_, + const Scalars & scalars_, const Tables & external_tables_); void createForShard( @@ -38,6 +40,7 @@ private: QueryProcessingStage::Enum processed_stage; QualifiedTableName main_table; ASTPtr table_func_ptr; + Scalars scalars; Tables external_tables; }; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index fef48898828..6783e68d70a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -88,6 +88,8 @@ namespace ErrorCodes extern const int SESSION_IS_LOCKED; extern const int CANNOT_GET_CREATE_TABLE_QUERY; extern const int LOGICAL_ERROR; + extern const int SCALAR_ALREADY_EXISTS; + extern const int UNKNOWN_SCALAR; } @@ -862,6 +864,21 @@ void Context::assertDatabaseDoesntExist(const String & database_name) const } +const Scalars & Context::getScalars() const +{ + return scalars; +} + + +const Block & Context::getScalar(const String & name) const +{ + auto it = scalars.find(name); + if (scalars.end() == it) + throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::UNKNOWN_SCALAR); + return it->second; +} + + Tables Context::getExternalTables() const { auto lock = getLock(); @@ -959,6 +976,19 @@ void Context::addExternalTable(const String & table_name, const StoragePtr & sto external_tables[table_name] = std::pair(storage, ast); } + +void Context::addScalar(const String & name, const Block & block) +{ + scalars[name] = block; +} + + +bool Context::hasScalar(const String & name) const +{ + return scalars.count(name); +} + + StoragePtr Context::tryRemoveExternalTable(const String & table_name) { TableAndCreateASTs::const_iterator it = external_tables.find(table_name); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index bba4fdb18e9..ff26ec95d85 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -105,6 +105,9 @@ using InputInitializer = std::function; /// Callback for reading blocks of data from client for function input() using InputBlocksReader = std::function; +/// Scalar results of sub queries +using Scalars = std::map; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -144,6 +147,7 @@ private: String default_format; /// Format, used when server formats data by itself and if query does not have FORMAT specification. /// Thus, used in HTTP interface. If not specified - then some globally default format is used. TableAndCreateASTs external_tables; /// Temporary tables. + Scalars scalars; StoragePtr view_source; /// Temporary StorageValues used to generate alias columns for materialized views Tables table_function_results; /// Temporary tables obtained by execution of table functions. Keyed by AST tree id. Context * query_context = nullptr; @@ -264,11 +268,15 @@ public: void assertDatabaseDoesntExist(const String & database_name) const; void checkDatabaseAccessRights(const std::string & database_name) const; + const Scalars & getScalars() const; + const Block & getScalar(const String & name) const; Tables getExternalTables() const; StoragePtr tryGetExternalTable(const String & table_name) const; StoragePtr getTable(const String & database_name, const String & table_name) const; StoragePtr tryGetTable(const String & database_name, const String & table_name) const; void addExternalTable(const String & table_name, const StoragePtr & storage, const ASTPtr & ast = {}); + void addScalar(const String & name, const Block & block); + bool hasScalar(const String & name) const; StoragePtr tryRemoveExternalTable(const String & table_name); StoragePtr executeTableFunction(const ASTPtr & table_expression); diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 59f7f46be70..9bf0f0ac18e 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -12,8 +12,11 @@ #include #include +#include #include +#include +#include namespace DB { @@ -53,69 +56,98 @@ void ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data) visit(*t, ast, data); } +/// Converting to literal values might take a fair amount of overhead when the value is large, (e.g. +/// Array, BitMap, etc.), This conversion is required for constant folding, index lookup, branch +/// elimination. However, these optimizations should never be related to large values, thus we +/// blacklist them here. +static bool worthConvertingToLiteral(const Block & scalar) +{ + auto scalar_type_name = scalar.safeGetByPosition(0).type->getFamilyName(); + std::set useless_literal_types = {"Array", "Tuple", "AggregateFunction", "Function", "Set", "LowCardinality"}; + return !useless_literal_types.count(scalar_type_name); +} + void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { - Context subquery_context = data.context; - Settings subquery_settings = data.context.getSettings(); - subquery_settings.max_result_rows = 1; - subquery_settings.extremes = 0; - subquery_context.setSettings(subquery_settings); + auto hash = subquery.getTreeHash(); + auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); - ASTPtr subquery_select = subquery.children.at(0); - BlockIO res = InterpreterSelectWithUnionQuery( - subquery_select, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute(); - - Block block; - try + Block scalar; + if (data.context.hasQueryContext() && data.context.getQueryContext().hasScalar(scalar_query_hash_str)) + scalar = data.context.getQueryContext().getScalar(scalar_query_hash_str); + else if (data.scalars.count(scalar_query_hash_str)) + scalar = data.scalars[scalar_query_hash_str]; + else { - block = res.in->read(); + Context subquery_context = data.context; + Settings subquery_settings = data.context.getSettings(); + subquery_settings.max_result_rows = 1; + subquery_settings.extremes = 0; + subquery_context.setSettings(subquery_settings); - if (!block) + ASTPtr subquery_select = subquery.children.at(0); + BlockIO res = InterpreterSelectWithUnionQuery( + subquery_select, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute(); + + Block block; + try { - /// Interpret subquery with empty result as Null literal - auto ast_new = std::make_unique(Null()); - ast_new->setAlias(ast->tryGetAlias()); - ast = std::move(ast_new); - return; + block = res.in->read(); + + if (!block) + { + /// Interpret subquery with empty result as Null literal + auto ast_new = std::make_unique(Null()); + ast_new->setAlias(ast->tryGetAlias()); + ast = std::move(ast_new); + return; + } + + if (block.rows() != 1 || res.in->read()) + throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::TOO_MANY_ROWS) + throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + else + throw; } - if (block.rows() != 1 || res.in->read()) - throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::TOO_MANY_ROWS) - throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + block = materializeBlock(block); + size_t columns = block.columns(); + + if (columns == 1) + scalar = block; else - throw; + { + + ColumnWithTypeAndName ctn; + ctn.type = std::make_shared(block.getDataTypes()); + ctn.column = ColumnTuple::create(block.getColumns()); + scalar.insert(ctn); + } } - size_t columns = block.columns(); - if (columns == 1) + const Settings & settings = data.context.getSettingsRef(); + + // Always convert to literals when there is no query context. + if (!settings.enable_scalar_subquery_optimization || worthConvertingToLiteral(scalar) || !data.context.hasQueryContext()) { - auto lit = std::make_unique((*block.safeGetByPosition(0).column)[0]); + auto lit = std::make_unique((*scalar.safeGetByPosition(0).column)[0]); lit->alias = subquery.alias; lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; - ast = addTypeConversionToAST(std::move(lit), block.safeGetByPosition(0).type->getName()); + ast = addTypeConversionToAST(std::move(lit), scalar.safeGetByPosition(0).type->getName()); } else { - auto tuple = std::make_shared(); - tuple->alias = subquery.alias; - ast = tuple; - tuple->name = "tuple"; - auto exp_list = std::make_shared(); - tuple->arguments = exp_list; - tuple->children.push_back(tuple->arguments); - - exp_list->children.resize(columns); - for (size_t i = 0; i < columns; ++i) - { - exp_list->children[i] = addTypeConversionToAST( - std::make_unique((*block.safeGetByPosition(i).column)[0]), - block.safeGetByPosition(i).type->getName()); - } + auto func = makeASTFunction("__getScalar", std::make_shared(scalar_query_hash_str)); + func->alias = subquery.alias; + func->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; + ast = std::move(func); } + + data.scalars[scalar_query_hash_str] = std::move(scalar); } void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index 85d68a54110..6613cafd495 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -36,6 +37,7 @@ public: { const Context & context; size_t subquery_depth; + Scalars & scalars; }; static bool needChildVisit(ASTPtr & node, const ASTPtr &); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 905fe6e3f04..dc7331f7031 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -305,6 +305,12 @@ InterpreterSelectQuery::InterpreterSelectQuery( syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage, NamesAndTypesList()); + + /// Save scalar sub queries's results in the query context + if (context.hasQueryContext()) + for (const auto & it : syntax_analyzer_result->getScalars()) + context.getQueryContext().addScalar(it.first, it.second); + query_analyzer = std::make_unique( query_ptr, syntax_analyzer_result, context, NameSet(required_result_column_names.begin(), required_result_column_names.end()), diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 228aea0b2f2..10b5e5483ee 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -220,10 +220,10 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, } /// Replacing scalar subqueries with constant values. -void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t subquery_depth) +void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t subquery_depth, Scalars & scalars) { LogAST log; - ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth, scalars}; ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } @@ -871,7 +871,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); /// Executing scalar subqueries - replacing them with constant values. - executeScalarSubqueries(query, context, subquery_depth); + executeScalarSubqueries(query, context, subquery_depth, result.scalars); /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 44fdc61ded3..96f5678ac6f 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -14,6 +15,7 @@ class ASTFunction; class AnalyzedJoin; class Context; struct SelectQueryOptions; +using Scalars = std::map; struct SyntaxAnalyzerResult { @@ -43,8 +45,12 @@ struct SyntaxAnalyzerResult /// Predicate optimizer overrides the sub queries bool rewrite_subqueries = false; + /// Results of scalar sub queries + Scalars scalars; + void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns); Names requiredSourceColumns() const { return required_source_columns.getNames(); } + const Scalars & getScalars() const { return scalars; } }; using SyntaxAnalyzerResultPtr = std::shared_ptr; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 2c289dd714e..36c54d3e460 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -323,11 +323,13 @@ BlockInputStreams StorageDistributed::read( Block header = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage)).getSampleBlock(); + const Scalars & scalars = context.hasQueryContext() ? context.getQueryContext().getScalars() : Scalars{}; + ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr ? ClusterProxy::SelectStreamFactory( - header, processed_stage, remote_table_function_ptr, context.getExternalTables()) + header, processed_stage, remote_table_function_ptr, scalars, context.getExternalTables()) : ClusterProxy::SelectStreamFactory( - header, processed_stage, QualifiedTableName{remote_database, remote_table}, context.getExternalTables()); + header, processed_stage, QualifiedTableName{remote_database, remote_table}, scalars, context.getExternalTables()); if (settings.optimize_skip_unused_shards) { From 20093fa065862af691e8a9df42df544a493d9d1e Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 23 Oct 2019 16:59:03 +0300 Subject: [PATCH 094/122] extract more logic out of QueryNormalizer --- dbms/src/Interpreters/ActionsVisitor.cpp | 2 +- .../Interpreters/CrossToInnerJoinVisitor.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- .../MarkTableIdentifiersVisitor.cpp | 47 +++++++++++++++++++ .../MarkTableIdentifiersVisitor.h | 33 +++++++++++++ .../PredicateExpressionsOptimizer.cpp | 4 ++ dbms/src/Interpreters/QueryNormalizer.cpp | 43 +---------------- dbms/src/Interpreters/QueryNormalizer.h | 17 +------ dbms/src/Interpreters/SyntaxAnalyzer.cpp | 32 +++++++++++++ dbms/src/Interpreters/misc.h | 16 +++++++ dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- .../MergeTreeIndexConditionBloomFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexFullText.cpp | 2 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 2 +- 15 files changed, 142 insertions(+), 66 deletions(-) create mode 100644 dbms/src/Interpreters/MarkTableIdentifiersVisitor.cpp create mode 100644 dbms/src/Interpreters/MarkTableIdentifiersVisitor.h create mode 100644 dbms/src/Interpreters/misc.h diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index c587d1826e1..3b3f1ddde63 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index 94b38b2c991..21f6610aa82 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -4,7 +4,7 @@ #include #include #include -#include // for functionIsInOperator +#include #include #include #include diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 59f7f46be70..e2666993da8 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 14849763ef3..f694f74989a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/src/Interpreters/MarkTableIdentifiersVisitor.cpp b/dbms/src/Interpreters/MarkTableIdentifiersVisitor.cpp new file mode 100644 index 00000000000..f110e0ba2df --- /dev/null +++ b/dbms/src/Interpreters/MarkTableIdentifiersVisitor.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool MarkTableIdentifiersMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) +{ + if (child->as()) + return false; + if (node->as()) + return false; + return true; +} + +void MarkTableIdentifiersMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * node_func = ast->as()) + visit(*node_func, ast, data); + else if (auto * node_table = ast->as()) + visit(*node_table, ast, data); +} + +void MarkTableIdentifiersMatcher::visit(ASTTableExpression & table, ASTPtr &, Data &) +{ + if (table.database_and_table_name) + setIdentifierSpecial(table.database_and_table_name); +} + +void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data & data) +{ + /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. + if (functionIsInOrGlobalInOperator(func.name)) + { + auto & ast = func.arguments->children.at(1); + if (auto opt_name = tryGetIdentifierName(ast)) + if (!data.aliases.count(*opt_name)) + setIdentifierSpecial(ast); + } +} + +} diff --git a/dbms/src/Interpreters/MarkTableIdentifiersVisitor.h b/dbms/src/Interpreters/MarkTableIdentifiersVisitor.h new file mode 100644 index 00000000000..f882f322bcf --- /dev/null +++ b/dbms/src/Interpreters/MarkTableIdentifiersVisitor.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ASTFunction; +struct ASTTableExpression; + +class MarkTableIdentifiersMatcher +{ +public: + using Visitor = InDepthNodeVisitor; + + struct Data + { + const Aliases & aliases; + }; + + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); + static void visit(ASTPtr & ast, Data & data); + +private: + static void visit(ASTTableExpression & table, ASTPtr &, Data &); + static void visit(const ASTFunction & func, ASTPtr &, Data &); +}; + +using MarkTableIdentifiersVisitor = MarkTableIdentifiersMatcher::Visitor; + +} diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 2a307c6ed7f..27772b8fc94 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -412,6 +413,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); + MarkTableIdentifiersVisitor::Data mark_tables_data{aliases}; + MarkTableIdentifiersVisitor(mark_tables_data).visit(ast); + QueryNormalizer::Data normalizer_data(aliases, settings); QueryNormalizer(normalizer_data).visit(ast); diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index e109e4a63fd..9d6d28a68f6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include namespace DB @@ -63,34 +62,6 @@ private: }; -void QueryNormalizer::visit(ASTFunction & node, const ASTPtr &, Data & data) -{ - auto & aliases = data.aliases; - String & func_name = node.name; - ASTPtr & func_arguments = node.arguments; - - /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. - if (functionIsInOrGlobalInOperator(func_name)) - { - auto & ast = func_arguments->children.at(1); - if (auto opt_name = tryGetIdentifierName(ast)) - if (!aliases.count(*opt_name)) - setIdentifierSpecial(ast); - } - - /// Special cases for count function. - String func_name_lowercase = Poco::toLower(func_name); - if (startsWith(func_name_lowercase, "count")) - { - /// Select implementation of countDistinct based on settings. - /// Important that it is done as query rewrite. It means rewritten query - /// will be sent to remote servers during distributed query execution, - /// and on all remote servers, function implementation will be same. - if (endsWith(func_name, "Distinct") && func_name_lowercase == "countdistinct") - func_name = data.settings.count_distinct_implementation; - } -} - void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) { auto & current_asts = data.current_asts; @@ -144,16 +115,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } -/// mark table identifiers as 'not columns' void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) { - /// mark table Identifiers as 'not a column' - if (node.table_expression) - { - auto & expr = node.table_expression->as(); - setIdentifierSpecial(expr.database_and_table_name); - } - /// normalize JOIN ON section if (node.table_join) { @@ -177,7 +140,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data if (needVisitChild(child)) visit(child, data); -#if 1 /// TODO: legacy? /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, /// but also in where_expression and having_expression. if (select.prewhere()) @@ -186,7 +148,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data visit(select.refWhere(), data); if (select.having()) visit(select.refHaving(), data); -#endif } /// Don't go into subqueries. @@ -243,9 +204,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) data.current_alias = my_alias; } - if (auto * node_func = ast->as()) - visit(*node_func, ast, data); - else if (auto * node_id = ast->as()) + if (auto * node_id = ast->as()) visit(*node_id, ast, data); else if (auto * node_tables = ast->as()) visit(*node_tables, ast, data); diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 6d6fea86e44..b842ae3f018 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -2,25 +2,13 @@ #include -#include #include #include namespace DB { -inline bool functionIsInOperator(const String & name) -{ - return name == "in" || name == "notIn"; -} - -inline bool functionIsInOrGlobalInOperator(const String & name) -{ - return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn"; -} - class ASTSelectQuery; -class ASTFunction; class ASTIdentifier; struct ASTTablesInSelectQueryElement; class Context; @@ -33,13 +21,11 @@ class QueryNormalizer { const UInt64 max_ast_depth; const UInt64 max_expanded_ast_elements; - const String count_distinct_implementation; template ExtractedSettings(const T & settings) : max_ast_depth(settings.max_ast_depth), - max_expanded_ast_elements(settings.max_expanded_ast_elements), - count_distinct_implementation(settings.count_distinct_implementation) + max_expanded_ast_elements(settings.max_expanded_ast_elements) {} }; @@ -80,7 +66,6 @@ private: static void visit(ASTPtr & query, Data & data); static void visit(ASTIdentifier &, ASTPtr &, Data &); - static void visit(ASTFunction &, const ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 228aea0b2f2..1f7a7abaec3 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,26 @@ namespace using LogAST = DebugASTLog; /// set to true to enable logs +/// Select implementation of countDistinct based on settings. +/// Important that it is done as query rewrite. It means rewritten query +/// will be sent to remote servers during distributed query execution, +/// and on all remote servers, function implementation will be same. +struct CustomizeFunctionsData +{ + using TypeToVisit = ASTFunction; + + const String & count_distinct; + + void visit(ASTFunction & func, ASTPtr &) + { + if (Poco::toLower(func.name) == "countdistinct") + func.name = count_distinct; + } +}; + +using CustomizeFunctionsMatcher = OneTypeMatcher; +using CustomizeFunctionsVisitor = InDepthNodeVisitor; + /// Add columns from storage to source_columns list. void collectSourceColumns(const ColumnsDescription & columns, NamesAndTypesList & source_columns, bool add_virtuals) @@ -850,6 +871,11 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); } + { + CustomizeFunctionsVisitor::Data data{settings.count_distinct_implementation}; + CustomizeFunctionsVisitor(data).visit(query); + } + /// Creates a dictionary `aliases`: alias -> ASTPtr { LogAST log; @@ -857,6 +883,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); } + /// Mark table ASTIdentifiers with not a column marker + { + MarkTableIdentifiersVisitor::Data data{result.aliases}; + MarkTableIdentifiersVisitor(data).visit(query); + } + /// Common subexpression elimination. Rewrite rules. { QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef()); diff --git a/dbms/src/Interpreters/misc.h b/dbms/src/Interpreters/misc.h new file mode 100644 index 00000000000..d5e2894bb4c --- /dev/null +++ b/dbms/src/Interpreters/misc.h @@ -0,0 +1,16 @@ +#pragma once + +namespace DB +{ + +inline bool functionIsInOperator(const std::string & name) +{ + return name == "in" || name == "notIn"; +} + +inline bool functionIsInOrGlobalInOperator(const std::string & name) +{ + return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn"; +} + +} diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index b3e4c776605..a2789fe3063 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 856354959f9..147071fc493 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 264c91cd890..da3f1df8130 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index a772e0a204b..dcca5baf311 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include From fa05a5860fbcf3aa3c1bc81f10dca688f78ee38c Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 23 Oct 2019 17:41:17 +0300 Subject: [PATCH 095/122] Update Dockerfile for binary packager (#7456) --- docker/packager/binary/Dockerfile | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index d88a2767efd..9a489f59d63 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -57,8 +57,6 @@ RUN apt-get update -y \ rename \ wget -# Build and install tools for cross-linking to Darwin - ENV CC=clang-8 ENV CXX=clang++-8 @@ -66,11 +64,19 @@ ENV CXX=clang++-8 RUN git clone https://github.com/tpoechtrager/apple-libtapi.git RUN cd apple-libtapi && INSTALLPREFIX=/cctools ./build.sh && ./install.sh +# Build and install tools for cross-linking to Darwin RUN git clone https://github.com/tpoechtrager/cctools-port.git RUN cd cctools-port/cctools && ./configure --prefix=/cctools --with-libtapi=/cctools --target=x86_64-apple-darwin && make install +# Download toolchain for Darwin +RUN mkdir -p /build/cmake/toolchain/darwin-x86_64 RUN wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz -RUN tar xJf MacOSX10.14.sdk.tar.xz -C /cctools +RUN tar --strip-components=1 xJf MacOSX10.14.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 + +# Download toolchain for ARM +RUN mkdir -p /build/cmake/toolchain/linux-aarch64 +RUN wget https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz +RUN tar --strip-components=1 xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C /build/cmake/toolchain/linux-aarch64 COPY build.sh / CMD ["/bin/bash", "/build.sh"] From 6dc497f7bc51e1cbbcf02b4a5b76fd59405c4590 Mon Sep 17 00:00:00 2001 From: Koblikov Mihail Date: Wed, 23 Oct 2019 19:32:42 +0400 Subject: [PATCH 096/122] fix typo in ontime.md (#7285) * fix type in ontime.md * Update docs/en/getting_started/example_datasets/ontime.md Co-Authored-By: Ivan Blinkov * Update docs/en/getting_started/example_datasets/ontime.md Co-Authored-By: Ivan Blinkov * Update docs/en/getting_started/example_datasets/ontime.md Co-Authored-By: Ivan Blinkov * Update ontime.md --- docs/en/getting_started/example_datasets/ontime.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/en/getting_started/example_datasets/ontime.md b/docs/en/getting_started/example_datasets/ontime.md index 5df0bd1fe5e..335532f1da9 100644 --- a/docs/en/getting_started/example_datasets/ontime.md +++ b/docs/en/getting_started/example_datasets/ontime.md @@ -321,12 +321,11 @@ ORDER BY Year; Q8. The most popular destinations by the number of directly connected cities for various year ranges ```sql -SELECT DestCityName, uniqExact(OriginCityName) AS u F -ROM ontime -WHERE Year>=2000 and Year<=2010 +SELECT DestCityName, uniqExact(OriginCityName) AS u +FROM ontime +WHERE Year >= 2000 and Year <= 2010 GROUP BY DestCityName -ORDER BY u DESC -LIMIT 10; +ORDER BY u DESC LIMIT 10; ``` Q9. From 32ca372b9d7f06d880ebfcf1a6d560d411527019 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 23 Oct 2019 18:54:18 +0300 Subject: [PATCH 097/122] Revert "Update Dockerfile for binary packager (#7456)" (#7458) This reverts commit fa05a5860fbcf3aa3c1bc81f10dca688f78ee38c. --- docker/packager/binary/Dockerfile | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 9a489f59d63..d88a2767efd 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -57,6 +57,8 @@ RUN apt-get update -y \ rename \ wget +# Build and install tools for cross-linking to Darwin + ENV CC=clang-8 ENV CXX=clang++-8 @@ -64,19 +66,11 @@ ENV CXX=clang++-8 RUN git clone https://github.com/tpoechtrager/apple-libtapi.git RUN cd apple-libtapi && INSTALLPREFIX=/cctools ./build.sh && ./install.sh -# Build and install tools for cross-linking to Darwin RUN git clone https://github.com/tpoechtrager/cctools-port.git RUN cd cctools-port/cctools && ./configure --prefix=/cctools --with-libtapi=/cctools --target=x86_64-apple-darwin && make install -# Download toolchain for Darwin -RUN mkdir -p /build/cmake/toolchain/darwin-x86_64 RUN wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz -RUN tar --strip-components=1 xJf MacOSX10.14.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 - -# Download toolchain for ARM -RUN mkdir -p /build/cmake/toolchain/linux-aarch64 -RUN wget https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -RUN tar --strip-components=1 xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C /build/cmake/toolchain/linux-aarch64 +RUN tar xJf MacOSX10.14.sdk.tar.xz -C /cctools COPY build.sh / CMD ["/bin/bash", "/build.sh"] From d1a19d26e85fce6d50fa56a42dff4b50604ca343 Mon Sep 17 00:00:00 2001 From: Konstantin Podshumok Date: Wed, 23 Oct 2019 20:33:40 +0300 Subject: [PATCH 098/122] Remove hardcoded paths in unwind target In most cases they match defaults now, but it is too hard to override when one needs to (alternative builds) --- contrib/libunwind-cmake/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index f09d0979692..7901a990b85 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -30,9 +30,4 @@ target_include_directories(unwind SYSTEM BEFORE PUBLIC $ Date: Fri, 18 Oct 2019 00:33:26 +0300 Subject: [PATCH 099/122] Fix INSERT into Distributed non local node with MATERIALIZED columns Previous patch e527def18a1bbe5fba0920b7747e9c556fd21ff5 ("Fix INSERT into Distributed() table with MATERIALIZED column") fixes it only for cases when the node is local, i.e. direct insert. This patch address the problem when the node is not local (`is_local == false`), by erasing materialized columns on INSERT into Distributed. And this patch fixes two cases, depends on `insert_distributed_sync` setting: - `insert_distributed_sync=0` ``` Not found column value in block. There are only columns: date. Stack trace: 2. 0x7ffff7be92e0 DB::Exception::Exception() dbms/src/Common/Exception.h:27 3. 0x7fffec5d6cf6 DB::Block::getByName(...) dbms/src/Core/Block.cpp:187 4. 0x7fffec2fe067 DB::NativeBlockInputStream::readImpl() dbms/src/DataStreams/NativeBlockInputStream.cpp:159 5. 0x7fffec2d223f DB::IBlockInputStream::read() dbms/src/DataStreams/IBlockInputStream.cpp:61 6. 0x7ffff7c6d40d DB::TCPHandler::receiveData() dbms/programs/server/TCPHandler.cpp:971 7. 0x7ffff7c6cc1d DB::TCPHandler::receivePacket() dbms/programs/server/TCPHandler.cpp:855 8. 0x7ffff7c6a1ef DB::TCPHandler::readDataNext(unsigned long const&, int const&) dbms/programs/server/TCPHandler.cpp:406 9. 0x7ffff7c6a41b DB::TCPHandler::readData(DB::Settings const&) dbms/programs/server/TCPHandler.cpp:437 10. 0x7ffff7c6a5d9 DB::TCPHandler::processInsertQuery(DB::Settings const&) dbms/programs/server/TCPHandler.cpp:464 11. 0x7ffff7c687b5 DB::TCPHandler::runImpl() dbms/programs/server/TCPHandler.cpp:257 ``` - `insert_distributed_sync=1` ``` 2019.10.18 13:23:22.114578 [ 44 ] {a78f669f-0b08-4337-abf8-d31e958f6d12} executeQuery: Code: 171, e.displayText() = DB::Exception: Block structure mismatch in RemoteBlockOutputStream stream: different number of columns: date Date UInt16(size = 1), value Date UInt16(size = 1) date Date UInt16(size = 0): Insertion status: Wrote 1 blocks and 0 rows on shard 0 replica 0, 127.0.0.1:59000 (average 0 ms per block) Wrote 0 blocks and 0 rows on shard 1 replica 0, 127.0.0.2:59000 (average 2 ms per block) (version 19.16.1.1) (from [::1]:3624) (in query: INSERT INTO distributed_00952 VALUES ), Stack trace: 2. 0x7ffff7be92e0 DB::Exception::Exception() dbms/src/Common/Exception.h:27 3. 0x7fffec5da4e9 DB::checkBlockStructure(...)::{...}::operator()(...) const dbms/src/Core/Block.cpp:460 4. 0x7fffec5da671 void DB::checkBlockStructure(...) dbms/src/Core/Block.cpp:467 5. 0x7fffec5d8d58 DB::assertBlocksHaveEqualStructure(...) dbms/src/Core/Block.cpp:515 6. 0x7fffec326630 DB::RemoteBlockOutputStream::write(DB::Block const&) dbms/src/DataStreams/RemoteBlockOutputStream.cpp:68 7. 0x7fffe98bd154 DB::DistributedBlockOutputStream::runWritingJob(DB::DistributedBlockOutputStream::JobReplica&, DB::Block const&)::{lambda()#1}::operator()() const dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp:280 ```` Fixes: #7365 Fixes: #5429 Refs: #6891 --- .../DistributedBlockOutputStream.cpp | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 5dce68ec381..f295b6d4d3d 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -81,12 +81,25 @@ void DistributedBlockOutputStream::writePrefix() void DistributedBlockOutputStream::write(const Block & block) { - if (insert_sync) - writeSync(block); - else - writeAsync(block); -} + Block ordinary_block{ block }; + /* They are added by the AddingDefaultBlockOutputStream, and we will get + * different number of columns eventually */ + for (const auto & col : storage.getColumns().getMaterialized()) + if (ordinary_block.has(col.name)) + { + ordinary_block.erase(col.name); + LOG_DEBUG(log, storage.getTableName() + << ": column " + col.name + " will be removed, " + << "because it is MATERIALIZED"); + } + + + if (insert_sync) + writeSync(ordinary_block); + else + writeAsync(ordinary_block); +} void DistributedBlockOutputStream::writeAsync(const Block & block) { From 80cf86f100bfaa165d252ef875558e4f130cd170 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 18 Oct 2019 10:22:43 +0300 Subject: [PATCH 100/122] Cover INSERT into Distributed with MATERIALIZED columns and !is_local node I guess that adding new cluster into server-test.xml is not required, but it won't harm. --- dbms/programs/server/config.xml | 16 ++++++++- ...ributed_with_materialized_column.reference | 11 +++++++ ...o_distributed_with_materialized_column.sql | 33 +++++++++++++++++-- dbms/tests/server-test.xml | 14 ++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index c8d33922167..6e9bb527c97 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -180,7 +180,21 @@ 9000 - + + + + + 127.0.0.1 + 9000 + + + + + 127.0.0.2 + 9000 + + + diff --git a/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.reference b/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.reference index b01acf34583..11b42f40c7a 100644 --- a/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.reference +++ b/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.reference @@ -1,3 +1,14 @@ +insert_distributed_sync=0 2018-08-01 2018-08-01 2018-08-01 2017-08-01 +2018-08-01 2017-08-01 +2018-08-01 +2018-08-01 2017-08-01 +insert_distributed_sync=1 +2018-08-01 +2018-08-01 +2018-08-01 2017-08-01 +2018-08-01 2017-08-01 +2018-08-01 +2018-08-01 2017-08-01 diff --git a/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.sql b/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.sql index 9e5bc3cbdf9..6b70d927204 100644 --- a/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.sql +++ b/dbms/tests/queries/0_stateless/00952_insert_into_distributed_with_materialized_column.sql @@ -1,15 +1,42 @@ DROP TABLE IF EXISTS local_00952; DROP TABLE IF EXISTS distributed_00952; -CREATE TABLE local_00952 (date Date, value Date MATERIALIZED toDate('2017-08-01')) ENGINE = MergeTree(date, date, 8192); -CREATE TABLE distributed_00952 AS local_00952 ENGINE = Distributed('test_shard_localhost', currentDatabase(), local_00952, rand()); +-- +-- insert_distributed_sync=0 +-- +SELECT 'insert_distributed_sync=0'; +SET insert_distributed_sync=0; -SET insert_distributed_sync=1; +CREATE TABLE local_00952 (date Date, value Date MATERIALIZED toDate('2017-08-01')) ENGINE = MergeTree(date, date, 8192); +CREATE TABLE distributed_00952 AS local_00952 ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), local_00952, rand()); INSERT INTO distributed_00952 VALUES ('2018-08-01'); +SYSTEM FLUSH DISTRIBUTED distributed_00952; + SELECT * FROM distributed_00952; +SELECT date, value FROM distributed_00952; SELECT * FROM local_00952; SELECT date, value FROM local_00952; DROP TABLE distributed_00952; DROP TABLE local_00952; + +-- +-- insert_distributed_sync=1 +-- +SELECT 'insert_distributed_sync=1'; +SET insert_distributed_sync=1; + +CREATE TABLE local_00952 (date Date, value Date MATERIALIZED toDate('2017-08-01')) ENGINE = MergeTree(date, date, 8192); +CREATE TABLE distributed_00952 AS local_00952 ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), local_00952, rand()); + +INSERT INTO distributed_00952 VALUES ('2018-08-01'); + +SELECT * FROM distributed_00952; +SELECT date, value FROM distributed_00952; +SELECT * FROM local_00952; +SELECT date, value FROM local_00952; + +DROP TABLE distributed_00952; +DROP TABLE local_00952; + diff --git a/dbms/tests/server-test.xml b/dbms/tests/server-test.xml index d68cbca53c1..d9e547b4d55 100644 --- a/dbms/tests/server-test.xml +++ b/dbms/tests/server-test.xml @@ -75,6 +75,20 @@ + + + + 127.0.0.1 + 59000 + + + + + 127.0.0.2 + 59000 + + + From f8a401bbf72e487d1a2953f8dfdf4b180acf54eb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 24 Oct 2019 08:56:53 +0800 Subject: [PATCH 101/122] Labeler seems to require additional permissions from PR authors (#7466) * Delete labeler.yml * Delete labeler.keywords.yml * Delete labeler.yml --- .github/labeler.keywords.yml | 1 - .github/labeler.yml | 19 ------------------- .github/workflows/labeler.yml | 11 ----------- 3 files changed, 31 deletions(-) delete mode 100644 .github/labeler.keywords.yml delete mode 100644 .github/labeler.yml delete mode 100644 .github/workflows/labeler.yml diff --git a/.github/labeler.keywords.yml b/.github/labeler.keywords.yml deleted file mode 100644 index c70ea45de53..00000000000 --- a/.github/labeler.keywords.yml +++ /dev/null @@ -1 +0,0 @@ -pr-feature: "New Feature" diff --git a/.github/labeler.yml b/.github/labeler.yml deleted file mode 100644 index 72cf714f039..00000000000 --- a/.github/labeler.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Documentation PRs -documentation: - - "**/*.md" - - "docs/**/*" -pr-documentation: - - "**/*.md" - - "docs/**/*" - -# Component labels -comp-mutations: - - "**/*Mutation*" -comp-matview: - - "**/*MaterializedView*" -comp-skipidx: - - "**/*Indices*" -comp-kafka: - - "dbms/src/Storages/Kafka/**/*" - - "dbms/tests/integration/test_storage_kafka/**/*" - - "utils/kafka/**/*" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index 0110ef7b516..00000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: "Pull Request Labeler" -on: - pull_request - -jobs: - by-filename: - runs-on: ubuntu-latest - steps: - - uses: "actions/labeler@v2" - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" From e8e5cefc354de553f3ff7fd0485778aeab82aa95 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 24 Oct 2019 08:58:06 +0300 Subject: [PATCH 102/122] Fixed integration test for #7414. --- .../integration/test_multiple_disks/test.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dbms/tests/integration/test_multiple_disks/test.py b/dbms/tests/integration/test_multiple_disks/test.py index d0e2cb6ef19..9c69a99ef1b 100644 --- a/dbms/tests/integration/test_multiple_disks/test.py +++ b/dbms/tests/integration/test_multiple_disks/test.py @@ -129,6 +129,38 @@ def test_system_tables(start_cluster): "max_data_part_size": "20971520", "move_factor": 0.1, }, + { + "policy_name": "special_warning_policy", + "volume_name": "special_warning_zero_volume", + "volume_priority": "1", + "disks": ["default"], + "max_data_part_size": "0", + "move_factor": 0.1, + }, + { + "policy_name": "special_warning_policy", + "volume_name": "special_warning_default_volume", + "volume_priority": "2", + "disks": ["external"], + "max_data_part_size": "0", + "move_factor": 0.1, + }, + { + "policy_name": "special_warning_policy", + "volume_name": "special_warning_small_volume", + "volume_priority": "3", + "disks": ["jbod1"], + "max_data_part_size": "1024", + "move_factor": 0.1, + }, + { + "policy_name": "special_warning_policy", + "volume_name": "special_warning_big_volume", + "volume_priority": "4", + "disks": ["jbod2"], + "max_data_part_size": "1024000000", + "move_factor": 0.1, + }, ] clickhouse_policies_data = json.loads(node1.query("SELECT * FROM system.storage_policies WHERE policy_name != 'default' FORMAT JSON"))["data"] From 3d2eab75353960b7ed0c03e037540ca5d4a05e0c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Oct 2019 09:49:58 +0200 Subject: [PATCH 103/122] Add PARTITION ID to OPTIMIZE documentation --- docs/en/query_language/misc.md | 4 ++-- docs/ru/query_language/misc.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index 22d67044619..9bcac5cdbfd 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -174,7 +174,7 @@ Changes already made by the mutation are not rolled back. ## OPTIMIZE {#misc_operations-optimize} ```sql -OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] +OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] ``` This query tries to initialize an unscheduled merge of data parts for tables with a table engine from the [MergeTree](../operations/table_engines/mergetree.md) family. Other kinds of table engines aren't supported. @@ -182,7 +182,7 @@ This query tries to initialize an unscheduled merge of data parts for tables wit When `OPTIMIZE` is used with the [ReplicatedMergeTree](../operations/table_engines/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all nodes (if the `replication_alter_partitions_sync` setting is enabled). - If `OPTIMIZE` doesn't perform a merge for any reason, it doesn't notify the client. To enable notifications, use the [optimize_throw_if_noop](../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. +- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter.md#alter-how-to-specify-part-expr). - If you specify `FINAL`, optimization is performed even when all the data is already in one part. !!! warning "Warning" diff --git a/docs/ru/query_language/misc.md b/docs/ru/query_language/misc.md index 00cb0e7fd93..ce73a5aafdb 100644 --- a/docs/ru/query_language/misc.md +++ b/docs/ru/query_language/misc.md @@ -173,7 +173,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = ' ## OPTIMIZE {#misc_operations-optimize} ```sql -OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] +OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] ``` Запрос пытается запустить внеплановый мёрж кусков данных для таблиц семейства [MergeTree](../operations/table_engines/mergetree.md). Другие движки таблиц не поддерживаются. @@ -181,7 +181,7 @@ OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] Если `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../operations/table_engines/replication.md), ClickHouse создаёт задачу на мёрж и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`). - Если `OPTIMIZE` не выполняет мёрж по любой причине, ClickHouse не оповещает об этом клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../operations/settings/settings.md#setting-optimize_throw_if_noop). -- Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. +- Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter.md#alter-how-to-specify-part-expr). - Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске. !!! warning "Внимание" From 3debdc211904eb0b5fe6cca1a4af99d8db635892 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 24 Oct 2019 11:52:33 +0300 Subject: [PATCH 104/122] Added integration tests for ALTER MOVE PARTITION and fixed minor things. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 54 +++++------- .../integration/test_multiple_disks/test.py | 87 ++++++++++++++++++- 2 files changed, 109 insertions(+), 32 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f530c537671..8cee32cb004 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2723,19 +2723,18 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & if (!disk) throw Exception("Disk " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK); - parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) { - return part_ptr->disk->getName() == disk->getName(); - }), parts.end()); + parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) + { + return part_ptr->disk->getName() == disk->getName(); + }), parts.end()); - if (!parts.empty()) + if (parts.empty()) { - if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) - throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); - } - else - { - LOG_DEBUG(log, "No parts of partition " << partition_id << " to move to disk " << disk->getName()); + throw Exception("All parts of partition " + partition_id + " are already on disk '" + disk->getName() + "'", ErrorCodes::UNKNOWN_DISK); } + + if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) + throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); } @@ -2763,32 +2762,25 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String if (!volume) throw Exception("Volume " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK); - for (const auto & part : parts) - for (const auto & disk : volume->disks) - if (part->disk->getName() == disk->getName()) - throw Exception("Part " + part->name + " already on volume '" + name + "'", ErrorCodes::UNKNOWN_DISK); - - parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) { - for (const auto & disk : volume->disks) + parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) { - if (part_ptr->disk->getName() == disk->getName()) + for (const auto & disk : volume->disks) { - return true; + if (part_ptr->disk->getName() == disk->getName()) + { + return true; + } } - } - return false; - }), parts.end()); + return false; + }), parts.end()); + if (parts.empty()) + { + throw Exception("All parts of partition " + partition_id + " are already on volume '" + volume->getName() + "'", ErrorCodes::UNKNOWN_DISK); + } - if (!parts.empty()) - { - if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) - throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); - } - else - { - LOG_DEBUG(log, "No parts of partition " << partition_id << " to move to volume " << volume->getName()); - } + if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) + throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED); } diff --git a/dbms/tests/integration/test_multiple_disks/test.py b/dbms/tests/integration/test_multiple_disks/test.py index 4ee337229c9..ed8ad699472 100644 --- a/dbms/tests/integration/test_multiple_disks/test.py +++ b/dbms/tests/integration/test_multiple_disks/test.py @@ -462,7 +462,7 @@ def test_alter_move(start_cluster, name, engine): node1.query("INSERT INTO {} VALUES(toDate('2019-04-10'), 42)".format(name)) node1.query("INSERT INTO {} VALUES(toDate('2019-04-11'), 43)".format(name)) used_disks = get_used_disks_for_table(node1, name) - assert all(d.startswith("jbod") for d in used_disks), "All writes shoud go to jbods" + assert all(d.startswith("jbod") for d in used_disks), "All writes should go to jbods" first_part = node1.query("SELECT name FROM system.parts WHERE table = '{}' and active = 1 ORDER BY modification_time LIMIT 1".format(name)).strip() @@ -498,6 +498,91 @@ def test_alter_move(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + +@pytest.mark.parametrize("volume_or_disk", [ + "DISK", + "VOLUME" +]) +def test_alter_move_half_of_partition(start_cluster, volume_or_disk): + name = "alter_move_half_of_partition" + engine = "MergeTree()" + try: + node1.query(""" + CREATE TABLE {name} ( + EventDate Date, + number UInt64 + ) ENGINE = {engine} + ORDER BY tuple() + PARTITION BY toYYYYMM(EventDate) + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + node1.query("SYSTEM STOP MERGES {}".format(name)) + + node1.query("INSERT INTO {} VALUES(toDate('2019-03-15'), 65)".format(name)) + node1.query("INSERT INTO {} VALUES(toDate('2019-03-16'), 42)".format(name)) + used_disks = get_used_disks_for_table(node1, name) + assert all(d.startswith("jbod") for d in used_disks), "All writes should go to jbods" + + time.sleep(1) + parts = node1.query("SELECT name FROM system.parts WHERE table = '{}' and active = 1".format(name)).splitlines() + assert len(parts) == 2 + + node1.query("ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format(name, parts[0])) + disks = node1.query("SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format(name, parts[0])).splitlines() + assert disks == ["external"] + + time.sleep(1) + node1.query("ALTER TABLE {} MOVE PARTITION 201903 TO {volume_or_disk} 'external'".format(name, volume_or_disk=volume_or_disk)) + disks = node1.query("SELECT disk_name FROM system.parts WHERE table = '{}' and partition = '201903' and active = 1".format(name)).splitlines() + assert disks == ["external"]*2 + + assert node1.query("SELECT COUNT() FROM {}".format(name)) == "2\n" + + finally: + node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + + +@pytest.mark.parametrize("volume_or_disk", [ + "DISK", + "VOLUME" +]) +def test_alter_double_move_partition(start_cluster, volume_or_disk): + name = "alter_double_move_partition" + engine = "MergeTree()" + try: + node1.query(""" + CREATE TABLE {name} ( + EventDate Date, + number UInt64 + ) ENGINE = {engine} + ORDER BY tuple() + PARTITION BY toYYYYMM(EventDate) + SETTINGS storage_policy='jbods_with_external' + """.format(name=name, engine=engine)) + + node1.query("SYSTEM STOP MERGES {}".format(name)) + + node1.query("INSERT INTO {} VALUES(toDate('2019-03-15'), 65)".format(name)) + node1.query("INSERT INTO {} VALUES(toDate('2019-03-16'), 42)".format(name)) + used_disks = get_used_disks_for_table(node1, name) + assert all(d.startswith("jbod") for d in used_disks), "All writes should go to jbods" + + time.sleep(1) + node1.query("ALTER TABLE {} MOVE PARTITION 201903 TO {volume_or_disk} 'external'".format(name, volume_or_disk=volume_or_disk)) + disks = node1.query("SELECT disk_name FROM system.parts WHERE table = '{}' and partition = '201903' and active = 1".format(name)).splitlines() + assert disks == ["external"]*2 + + assert node1.query("SELECT COUNT() FROM {}".format(name)) == "2\n" + + time.sleep(1) + with pytest.raises(QueryRuntimeException): + node1.query("ALTER TABLE {} MOVE PARTITION 201903 TO {volume_or_disk} 'external'".format(name, volume_or_disk=volume_or_disk)) + + finally: + node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + + def produce_alter_move(node, name): move_type = random.choice(["PART", "PARTITION"]) if move_type == "PART": From 255da8f5e03d0afcd16abc541ed83b715c367e19 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 24 Oct 2019 12:11:06 +0300 Subject: [PATCH 105/122] Fixed style. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 8cee32cb004..b08717e60e9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2730,7 +2730,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & if (parts.empty()) { - throw Exception("All parts of partition " + partition_id + " are already on disk '" + disk->getName() + "'", ErrorCodes::UNKNOWN_DISK); + throw Exception("All parts of partition '" + partition_id + "' are already on disk '" + disk->getName() + "'", ErrorCodes::UNKNOWN_DISK); } if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) @@ -2776,7 +2776,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String if (parts.empty()) { - throw Exception("All parts of partition " + partition_id + " are already on volume '" + volume->getName() + "'", ErrorCodes::UNKNOWN_DISK); + throw Exception("All parts of partition '" + partition_id + "' are already on volume '" + volume->getName() + "'", ErrorCodes::UNKNOWN_DISK); } if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) From 7edd80c9b7880ac07531a6874f6d2e78b7a5be67 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 24 Oct 2019 12:25:28 +0300 Subject: [PATCH 106/122] Add test for existing dictionary --- dbms/src/Databases/DatabasesCommon.cpp | 12 +++++++--- .../integration/test_dictionaries_ddl/test.py | 23 ++++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 1f1a1a7a004..2feda6fc2b3 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -186,14 +186,20 @@ void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const Sto void DatabaseWithOwnTablesBase::attachDictionary(const String & dictionary_name, const Context & context, bool load) { + const auto & external_loader = context.getExternalDictionariesLoader(); + + String full_name = getDatabaseName() + "." + dictionary_name; { std::lock_guard lock(mutex); - if (!dictionaries.emplace(dictionary_name).second) - throw Exception("Dictionary " + name + "." + dictionary_name + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); + auto status = external_loader.getCurrentStatus(full_name); + if (status != ExternalLoader::Status::NOT_EXIST || !dictionaries.emplace(dictionary_name).second) + throw Exception( + "Dictionary " + full_name + " already exists.", + ErrorCodes::DICTIONARY_ALREADY_EXISTS); } if (load) - context.getExternalDictionariesLoader().reload(getDatabaseName() + "." + dictionary_name, true); + external_loader.reload(full_name, true); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/dbms/tests/integration/test_dictionaries_ddl/test.py b/dbms/tests/integration/test_dictionaries_ddl/test.py index 6687bed215c..a949bee136a 100644 --- a/dbms/tests/integration/test_dictionaries_ddl/test.py +++ b/dbms/tests/integration/test_dictionaries_ddl/test.py @@ -12,6 +12,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) node1 = cluster.add_instance('node1', with_mysql=True, main_configs=['configs/dictionaries/simple_dictionary.xml']) node2 = cluster.add_instance('node2', with_mysql=True, main_configs=['configs/dictionaries/simple_dictionary.xml', 'configs/dictionaries/lazy_load.xml']) +node3 = cluster.add_instance('node3', main_configs=['configs/dictionaries/dictionary_with_conflict_name.xml']) def create_mysql_conn(user, password, hostname, port): @@ -33,7 +34,7 @@ def execute_mysql_query(connection, query): def started_cluster(): try: cluster.start() - for clickhouse in [node1, node2]: + for clickhouse in [node1, node2, node3]: clickhouse.query("CREATE DATABASE test", user="admin") clickhouse.query("CREATE TABLE test.xml_dictionary_table (id UInt64, SomeValue1 UInt8, SomeValue2 String) ENGINE = MergeTree() ORDER BY id", user="admin") clickhouse.query("INSERT INTO test.xml_dictionary_table SELECT number, number % 23, hex(number) from numbers(1000)", user="admin") @@ -161,3 +162,23 @@ def test_restricted_database(started_cluster): SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_in_restricted_db' DB 'restricted_db')) LIFETIME(MIN 1 MAX 10) """) + + +def test_conflicting_name(started_cluster): + assert node3.query("select dictGetUInt8('test.conflicting_dictionary', 'SomeValue1', toUInt64(17))") == '17\n' + + with pytest.raises(QueryRuntimeException): + node3.query(""" + CREATE DICTIONARY test.conflicting_dictionary( + id UInt64, + SomeValue1 UInt8, + SomeValue2 String + ) + PRIMARY KEY id + LAYOUT(FLAT()) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'xml_dictionary_table' DB 'test')) + LIFETIME(MIN 1 MAX 10) + """) + + # old version still works + node3.query("select dictGetUInt8('test.conflicting_dictionary', 'SomeValue1', toUInt64(17))") == '17\n' From 050de71ef47fc06caa7ce029fd9eebe83413f806 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 24 Oct 2019 12:33:45 +0300 Subject: [PATCH 107/122] Update DistributedBlockOutputStream.cpp --- dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index f295b6d4d3d..ee3ebfd9964 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -86,6 +86,7 @@ void DistributedBlockOutputStream::write(const Block & block) /* They are added by the AddingDefaultBlockOutputStream, and we will get * different number of columns eventually */ for (const auto & col : storage.getColumns().getMaterialized()) + { if (ordinary_block.has(col.name)) { ordinary_block.erase(col.name); @@ -93,6 +94,7 @@ void DistributedBlockOutputStream::write(const Block & block) << ": column " + col.name + " will be removed, " << "because it is MATERIALIZED"); } + } if (insert_sync) From 64f158ff28b04ece575e881e7810618fa5d05e2b Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 24 Oct 2019 13:56:32 +0300 Subject: [PATCH 108/122] Fixed message for ALTER MOVE PART. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b08717e60e9..e3de86f5a78 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2730,7 +2730,13 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & if (parts.empty()) { - throw Exception("All parts of partition '" + partition_id + "' are already on disk '" + disk->getName() + "'", ErrorCodes::UNKNOWN_DISK); + String no_parts_to_move_message; + if (moving_part) + no_parts_to_move_message = "Part '" + partition_id + "' is already on disk '" + disk->getName() + "'"; + else + no_parts_to_move_message = "All parts of partition '" + partition_id + "' are already on disk '" + disk->getName() + "'"; + + throw Exception(no_parts_to_move_message, ErrorCodes::UNKNOWN_DISK); } if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) @@ -2776,7 +2782,13 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String if (parts.empty()) { - throw Exception("All parts of partition '" + partition_id + "' are already on volume '" + volume->getName() + "'", ErrorCodes::UNKNOWN_DISK); + String no_parts_to_move_message; + if (moving_part) + no_parts_to_move_message = "Part '" + partition_id + "' is already on volume '" + volume->getName() + "'"; + else + no_parts_to_move_message = "All parts of partition '" + partition_id + "' are already on volume '" + volume->getName() + "'"; + + throw Exception(no_parts_to_move_message, ErrorCodes::UNKNOWN_DISK); } if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) From 08788e344372995eedc731560f9b57798f7af06f Mon Sep 17 00:00:00 2001 From: rainbowsysu Date: Thu, 24 Oct 2019 20:07:10 +0800 Subject: [PATCH 109/122] fixed the given examples of nullable in zh doc --- docs/zh/data_types/nullable.md | 40 ++++++++++------------------------ 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/docs/zh/data_types/nullable.md b/docs/zh/data_types/nullable.md index 41565f9d721..67d98fa9408 100644 --- a/docs/zh/data_types/nullable.md +++ b/docs/zh/data_types/nullable.md @@ -19,37 +19,21 @@ ## 用法示例 + +```sql +CREATE TABLE t_null(x Int8, y Nullable(Int8)) ENGINE TinyLog ``` -:) CREATE TABLE t_null(x Int8, y Nullable(Int8)) ENGINE TinyLog - -CREATE TABLE t_null -( - x Int8, - y Nullable(Int8) -) -ENGINE = TinyLog - -Ok. - -0 rows in set. Elapsed: 0.012 sec. - -:) INSERT INTO t_null VALUES (1, NULL) - -INSERT INTO t_null VALUES - -Ok. - -1 rows in set. Elapsed: 0.007 sec. - -:) SELECT x + y FROM t_null - -SELECT x + y -FROM t_null - +```sql +INSERT INTO t_null VALUES (1, NULL), (2, 3) +``` +```sql +SELECT x + y FROM t_null +``` +```text ┌─plus(x, y)─┐ │ ᴺᵁᴸᴸ │ │ 5 │ └────────────┘ - -2 rows in set. Elapsed: 0.144 sec. ``` + +[来源文章](https://clickhouse.yandex/docs/en/data_types/nullable/) \ No newline at end of file From c250db4922faa7cfa723cb8ecff9252585d6851b Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Thu, 24 Oct 2019 15:56:30 +0300 Subject: [PATCH 110/122] Update Docker Image for Binary Packager (#7474) --- docker/packager/binary/Dockerfile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index d88a2767efd..c15de71e848 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -57,20 +57,28 @@ RUN apt-get update -y \ rename \ wget -# Build and install tools for cross-linking to Darwin - ENV CC=clang-8 ENV CXX=clang++-8 # libtapi is required to support .tbh format from recent MacOS SDKs RUN git clone https://github.com/tpoechtrager/apple-libtapi.git RUN cd apple-libtapi && INSTALLPREFIX=/cctools ./build.sh && ./install.sh +RUN rm -rf apple-libtapi +# Build and install tools for cross-linking to Darwin RUN git clone https://github.com/tpoechtrager/cctools-port.git RUN cd cctools-port/cctools && ./configure --prefix=/cctools --with-libtapi=/cctools --target=x86_64-apple-darwin && make install +RUN rm -rf cctools-port +# Download toolchain for Darwin +RUN mkdir -p /build/cmake/toolchain/darwin-x86_64 RUN wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz -RUN tar xJf MacOSX10.14.sdk.tar.xz -C /cctools +RUN tar xJf MacOSX10.14.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 + +# Download toolchain for ARM +RUN mkdir -p /build/cmake/toolchain/linux-aarch64 +RUN wget "https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en" -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz +RUN tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C /build/cmake/toolchain/linux-aarch64 --strip-components=1 COPY build.sh / CMD ["/bin/bash", "/build.sh"] From f2028e901d07a85404af07e855ca05fd9707fde1 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 24 Oct 2019 16:04:50 +0300 Subject: [PATCH 111/122] review related changes --- dbms/src/Interpreters/IdentifierSemantic.cpp | 12 +++++++----- dbms/src/Interpreters/IdentifierSemantic.h | 5 ----- ...mn.reference => 01018_ambiguous_column.reference} | 1 + ...biguous_column.sql => 01018_ambiguous_column.sql} | 3 +++ 4 files changed, 11 insertions(+), 10 deletions(-) rename dbms/tests/queries/0_stateless/{01018_anbiguous_column.reference => 01018_ambiguous_column.reference} (99%) rename dbms/tests/queries/0_stateless/{01018_anbiguous_column.sql => 01018_ambiguous_column.sql} (86%) diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index 9930da0e699..34910ef039f 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -36,9 +36,9 @@ IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, for (size_t i = 0; i < tables.size(); ++i) { auto match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i])); - if (value(match)) + if (match != ColumnMatch::NoMatch) { - if (value(match) > value(best_match)) + if (match > best_match) { best_match = match; best_table_pos = i; @@ -49,7 +49,7 @@ IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, } } - if (value(best_match) && same_match) + if ((best_match != ColumnMatch::NoMatch) && same_match) { if (!allow_ambiguous) throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); @@ -111,13 +111,15 @@ std::optional IdentifierSemantic::getMembership(const ASTIdentifier & id bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, size_t & best_table_pos, bool ambiguous) { - return value(tryChooseTable(identifier, tables, best_table_pos, ambiguous)); + static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch; + return tryChooseTable(identifier, tables, best_table_pos, ambiguous) != no_match; } bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, size_t & best_table_pos, bool ambiguous) { - return value(tryChooseTable(identifier, tables, best_table_pos, ambiguous)); + static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch; + return tryChooseTable(identifier, tables, best_table_pos, ambiguous) != no_match; } std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 832a3345b5a..82b5ff31dde 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -53,9 +53,4 @@ private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table); }; -inline UInt32 value(IdentifierSemantic::ColumnMatch match) -{ - return static_cast(match); -} - } diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference b/dbms/tests/queries/0_stateless/01018_ambiguous_column.reference similarity index 99% rename from dbms/tests/queries/0_stateless/01018_anbiguous_column.reference rename to dbms/tests/queries/0_stateless/01018_ambiguous_column.reference index 90b24009d0f..a2a1d6ea4f6 100644 --- a/dbms/tests/queries/0_stateless/01018_anbiguous_column.reference +++ b/dbms/tests/queries/0_stateless/01018_ambiguous_column.reference @@ -10,3 +10,4 @@ ┌─A.dummy─┬─one.dummy─┬─two.dummy─┐ │ 0 │ 0 │ 0 │ └─────────┴───────────┴───────────┘ +0 diff --git a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql b/dbms/tests/queries/0_stateless/01018_ambiguous_column.sql similarity index 86% rename from dbms/tests/queries/0_stateless/01018_anbiguous_column.sql rename to dbms/tests/queries/0_stateless/01018_ambiguous_column.sql index ab291178f87..54603aab810 100644 --- a/dbms/tests/queries/0_stateless/01018_anbiguous_column.sql +++ b/dbms/tests/queries/0_stateless/01018_ambiguous_column.sql @@ -22,3 +22,6 @@ SELECT * from one A JOIN system.one one ON A.dummy = one.dummy JOIN system.one two ON A.dummy = two.dummy FORMAT PrettyCompact; + +-- SELECT one.dummy FROM one AS A FULL JOIN (SELECT 0 AS dymmy) AS one USING dummy; +SELECT one.dummy FROM one AS A JOIN (SELECT 0 AS dummy) B USING dummy; From 8e5a6cd2060311ccb96fa9a6967cde2ae3e8d796 Mon Sep 17 00:00:00 2001 From: rainbowsysu Date: Thu, 24 Oct 2019 21:22:50 +0800 Subject: [PATCH 112/122] update trainslation of fixedstring in zh --- docs/zh/data_types/fixedstring.md | 60 +++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/docs/zh/data_types/fixedstring.md b/docs/zh/data_types/fixedstring.md index 050f0a582a2..18ed3ed45bc 100644 --- a/docs/zh/data_types/fixedstring.md +++ b/docs/zh/data_types/fixedstring.md @@ -1,9 +1,55 @@ -# FixedString(N) +# FixedString -固定长度 N 的字符串。N 必须是严格的正自然数。 -当服务端读取长度小于 N 的字符串时候(例如解析 INSERT 数据时),通过在字符串末尾添加空字节来达到 N 字节长度。 -当服务端读取长度大于 N 的字符串时候,将返回错误消息。 -当服务器写入一个字符串(例如,当输出 SELECT 查询的结果)时,NULL字节不会从字符串的末尾被移除,而是被输出。 -注意这种方式与 MYSQL 的 CHAR 类型是不一样的(MYSQL 的字符串会以空格填充,然后输出的时候空格会被修剪)。 +固定长度 N 的字符串(N 必须是严格的正自然数)。 -与 `String` 类型相比,极少的函数会使用 `FixedString(N)`,因此使用起来不太方便。 +您可以使用下面的语法对列声明为`FixedString`类型: + +```sql + FixedString(N) +``` + +其中`N`表示自然数。 + +当数据的长度恰好为N个字节时,`FixedString`类型是高效的。 在其他情况下,这可能会降低效率。 + +可以有效存储在`FixedString`类型的列中的值的示例: + +- 二进制表示的IP地址(IPv6使用`FixedString(16)`) +- 语言代码(ru_RU, en_US ... ) +- 货币代码(USD, RUB ... ) +- 二进制表示的哈希值(MD5使用`FixedString(16)`,SHA256使用`FixedString(32)`) + +请使用[UUID](uuid.md)数据类型来存储UUID值,。 + +当向ClickHouse中插入数据时, + +- 如果字符串包含的字节数少于`N',将对字符串末尾进行空字节填充。 +- 如果字符串包含的字节数大于`N`,将抛出`Too large value for FixedString(N)`异常。 + +当做数据查询时,ClickHouse不会删除字符串末尾的空字节。 如果使用`WHERE`子句,则须要手动添加空字节以匹配`FixedString`的值。 以下示例阐明了如何将`WHERE`子句与`FixedString`一起使用。 + +考虑带有`FixedString(2)`列的表: + +```text +┌─name──┐ +│ b │ +└───────┘ +``` + +查询语句`SELECT * FROM FixedStringTable WHERE a = 'b'` 不会返回任何结果。请使用空字节来填充筛选条件。 + +```sql +SELECT * FROM FixedStringTable +WHERE a = 'b\0' +``` +```text +┌─a─┐ +│ b │ +└───┘ +``` + +这种方式与MySQL的`CHAR`类型的方式不同(MySQL中使用空格填充字符串,并在输出时删除空格)。 + +请注意,`FixedString(N)`的长度是个常量。仅由空字符组成的字符串,函数[length](../query_language/functions/array_functions.md#array_functions-length)返回值为`N`,而函数[empty](../query_language/functions/string_functions.md#string_functions-empty)的返回值为`1`。 + +[来源文章](https://clickhouse.yandex/docs/en/data_types/fixedstring/) From 8755f5ffa91020b1ab1fd55700bfa8dc7721fb1d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 24 Oct 2019 23:08:36 +0300 Subject: [PATCH 113/122] Add missed config --- .../dictionary_with_conflict_name.xml | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/dictionary_with_conflict_name.xml diff --git a/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/dictionary_with_conflict_name.xml b/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/dictionary_with_conflict_name.xml new file mode 100644 index 00000000000..75e6f8953eb --- /dev/null +++ b/dbms/tests/integration/test_dictionaries_ddl/configs/dictionaries/dictionary_with_conflict_name.xml @@ -0,0 +1,41 @@ + + + test.conflicting_dictionary + + + localhost + 9000 + default + + test + xml_dictionary_table
+
+ + + + 0 + 0 + + + + 128 + + + + + id + + + SomeValue1 + UInt8 + 1 + + + + SomeValue2 + String + '' + + +
+
From 7e5b05bbe8b9ffa7b0c38b90eff5211724aa0fe7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 24 Oct 2019 23:55:41 +0300 Subject: [PATCH 114/122] Revert "Remove hardcoded paths in unwind target" --- contrib/libunwind-cmake/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 7901a990b85..f09d0979692 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -30,4 +30,9 @@ target_include_directories(unwind SYSTEM BEFORE PUBLIC $ Date: Fri, 25 Oct 2019 11:37:22 +0200 Subject: [PATCH 115/122] remove some obsolete notes about mutations --- docs/en/query_language/alter.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index b7b37924c71..7a3130ef30f 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -389,7 +389,7 @@ Possible values: `0` – do not wait; `1` – only wait for own execution (defau Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. -The functionality is in beta stage and is available starting with the 1.1.54388 version. Currently `*MergeTree` table engines are supported (both replicated and unreplicated). +Currently `*MergeTree` table engines are supported (both replicated and unreplicated). Existing tables are ready for mutations as-is (no conversion necessary), but after the first mutation is applied to a table, its metadata format becomes incompatible with previous server versions and falling back to a previous version becomes impossible. @@ -405,7 +405,7 @@ The `filter_expr` must be of type UInt8. The query deletes rows in the table for ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr ``` -The command is available starting with the 18.12.14 version. The `filter_expr` must be of type UInt8. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported. +The `filter_expr` must be of type UInt8. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported. ```sql ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name From cbecfd243226ee907b82097b4ed8f66e40f26186 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Fri, 25 Oct 2019 13:04:07 +0200 Subject: [PATCH 116/122] Fixing 'note' markdown --- docs/en/query_language/alter.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 7a3130ef30f..f275a908fd9 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -294,7 +294,7 @@ This query creates a local backup of a specified partition. If the `PARTITION` c Note that for old-styled tables you can specify the prefix of the partition name (for example, '2019') - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). !!! note - The entire backup process is performed without stopping the server. + The entire backup process is performed without stopping the server. At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where: From 2d2e738085f71731eab61571f592b3d4fcebfdc2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 20 Oct 2019 23:04:52 +0300 Subject: [PATCH 117/122] Add CRC32IEEE()/CRC64() support zlib's implementation uses CRC-32-IEEE 802.3 polynomial (0xedb88320) but with starting value 0xffffffff, so introduce another crc32 implementation - CRC32IEEE that has starting value - 0 Also add CRC64 with ECMA polynomial. v2: s/crc*_data./crc*_data./ to avoid conflicts with other crc32.h in contrib v3: join with existing CRC32() --- dbms/src/Functions/CRC.cpp | 146 ++++++++++++++++++ dbms/src/Functions/CRC32.cpp | 68 -------- .../src/Functions/registerFunctionsString.cpp | 4 +- ...eference => 00936_crc_functions.reference} | 4 + ...2_function.sql => 00936_crc_functions.sql} | 5 + .../functions/string_functions.md | 15 +- .../functions/string_functions.md | 15 +- 7 files changed, 185 insertions(+), 72 deletions(-) create mode 100644 dbms/src/Functions/CRC.cpp delete mode 100644 dbms/src/Functions/CRC32.cpp rename dbms/tests/queries/0_stateless/{00936_crc32_function.reference => 00936_crc_functions.reference} (87%) rename dbms/tests/queries/0_stateless/{00936_crc32_function.sql => 00936_crc_functions.sql} (88%) diff --git a/dbms/src/Functions/CRC.cpp b/dbms/src/Functions/CRC.cpp new file mode 100644 index 00000000000..38462ff8884 --- /dev/null +++ b/dbms/src/Functions/CRC.cpp @@ -0,0 +1,146 @@ +#include +#include +#include +#include + +namespace +{ + +template +struct CRCBase +{ + T tab[256]; + CRCBase(T polynomial) + { + for (size_t i = 0; i < 256; ++i) + { + T c = i; + for (size_t j = 0; j < 8; ++j) + { + c = c & 1 ? polynomial ^ (c >> 1) : c >> 1; + } + tab[i] = c; + } + } +}; + +template +struct CRCImpl +{ + using ReturnType = T; + + static T make_crc(const unsigned char *buf, size_t size) + { + static CRCBase base(polynomial); + + T i, crc; + + crc = 0; + for (i = 0; i < size; i++) + { + crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); + } + return crc; + } +}; + +static constexpr UInt64 CRC64_ECMA = 0xc96c5795d7870f42ULL; +struct CRC64ECMAImpl : public CRCImpl +{ + static constexpr auto name = "CRC64"; +}; + +static constexpr UInt32 CRC32_IEEE = 0xedb88320; +struct CRC32IEEEImpl : public CRCImpl +{ + static constexpr auto name = "CRC32IEEE"; +}; + +struct CRC32ZLIBImpl +{ + using ReturnType = UInt32; + static constexpr auto name = "CRC32"; + + static UInt32 make_crc(const unsigned char *buf, size_t size) + { return crc32(0L, buf, size); } +}; + +} // \anonymous + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +template +struct CRCFunctionWrapper +{ + static constexpr auto is_fixed_to_constant = true; + using ReturnType = typename Impl::ReturnType; + + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + { + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + res[i] = do_crc(data, prev_offset, offsets[i] - prev_offset - 1); + prev_offset = offsets[i]; + } + } + + static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = do_crc(data, 0, n); } + + static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + { + size_t size = data.size() / n; + + for (size_t i = 0; i < size; ++i) + { + res[i] = do_crc(data, i * n, n); + } + } + + [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + { + throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + static ReturnType do_crc(const ColumnString::Chars & buf, size_t offset, size_t size) + { + const unsigned char * p = reinterpret_cast(&buf[0]) + offset; + return Impl::make_crc(p, size); + } +}; + +template +using FunctionCRC = FunctionStringOrArrayToT, T, typename T::ReturnType>; +// The same as IEEE variant, but uses 0xffffffff as initial value +// This is the default +// +// (And zlib is used here, since it has optimized version) +using FunctionCRC32ZLIB = FunctionCRC; +// Uses CRC-32-IEEE 802.3 polynomial +using FunctionCRC32IEEE = FunctionCRC; +// Uses CRC-64-ECMA polynomial +using FunctionCRC64ECMA = FunctionCRC; + +template +void registerFunctionCRCImpl(FunctionFactory & factory) +{ + factory.registerFunction(T::name, FunctionFactory::CaseInsensitive); +} + +void registerFunctionCRC(FunctionFactory & factory) +{ + registerFunctionCRCImpl(factory); + registerFunctionCRCImpl(factory); + registerFunctionCRCImpl(factory); +} + +} diff --git a/dbms/src/Functions/CRC32.cpp b/dbms/src/Functions/CRC32.cpp deleted file mode 100644 index 80e0f163571..00000000000 --- a/dbms/src/Functions/CRC32.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - -/** Calculates the CRC32 of a string - */ -struct CRC32Impl -{ - static constexpr auto is_fixed_to_constant = true; - - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) - { - size_t size = offsets.size(); - - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) - { - res[i] = do_crc32(data, prev_offset, offsets[i] - prev_offset - 1); - prev_offset = offsets[i]; - } - } - - static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, UInt32 & res) { res = do_crc32(data, 0, n); } - - static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) - { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { - res[i] = do_crc32(data, i * n, n); - } - } - - [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) - { - throw Exception("Cannot apply function CRC32 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - -private: - static uint32_t do_crc32(const ColumnString::Chars & buf, size_t offset, size_t size) - { - const unsigned char * p = reinterpret_cast(&buf[0]) + offset; - return crc32(0L, p, size); - } -}; - -struct NameCRC32 -{ - static constexpr auto name = "CRC32"; -}; -using FunctionCRC32 = FunctionStringOrArrayToT; - -void registerFunctionCRC32(FunctionFactory & factory) -{ - factory.registerFunction(NameCRC32::name, FunctionFactory::CaseInsensitive); -} - -} diff --git a/dbms/src/Functions/registerFunctionsString.cpp b/dbms/src/Functions/registerFunctionsString.cpp index 1f4219b18f2..df407750d35 100644 --- a/dbms/src/Functions/registerFunctionsString.cpp +++ b/dbms/src/Functions/registerFunctionsString.cpp @@ -20,7 +20,7 @@ void registerFunctionReverseUTF8(FunctionFactory &); void registerFunctionsConcat(FunctionFactory &); void registerFunctionFormat(FunctionFactory &); void registerFunctionSubstring(FunctionFactory &); -void registerFunctionCRC32(FunctionFactory &); +void registerFunctionCRC(FunctionFactory &); void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &); void registerFunctionStartsWith(FunctionFactory &); void registerFunctionEndsWith(FunctionFactory &); @@ -47,7 +47,7 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionLowerUTF8(factory); registerFunctionUpperUTF8(factory); registerFunctionReverse(factory); - registerFunctionCRC32(factory); + registerFunctionCRC(factory); registerFunctionReverseUTF8(factory); registerFunctionsConcat(factory); registerFunctionFormat(factory); diff --git a/dbms/tests/queries/0_stateless/00936_crc32_function.reference b/dbms/tests/queries/0_stateless/00936_crc_functions.reference similarity index 87% rename from dbms/tests/queries/0_stateless/00936_crc32_function.reference rename to dbms/tests/queries/0_stateless/00936_crc_functions.reference index 90c6a41551b..1431a2e654b 100644 --- a/dbms/tests/queries/0_stateless/00936_crc32_function.reference +++ b/dbms/tests/queries/0_stateless/00936_crc_functions.reference @@ -20,3 +20,7 @@ qwerty string 55151997 2663297705 qqq aaa 3142898280 4027020077 zxcqwer 3358319860 0 aasq xxz 3369829874 4069886758 +CRC32IEEE() +7332BC33 +CRC64() +72D5B9EA0B70CE1E diff --git a/dbms/tests/queries/0_stateless/00936_crc32_function.sql b/dbms/tests/queries/0_stateless/00936_crc_functions.sql similarity index 88% rename from dbms/tests/queries/0_stateless/00936_crc32_function.sql rename to dbms/tests/queries/0_stateless/00936_crc_functions.sql index 1bc9d9ec246..fd324ea23fa 100644 --- a/dbms/tests/queries/0_stateless/00936_crc32_function.sql +++ b/dbms/tests/queries/0_stateless/00936_crc_functions.sql @@ -18,3 +18,8 @@ select CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2); select str1, str2, CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2); DROP TABLE table1; + +SELECT 'CRC32IEEE()'; +SELECT hex(CRC32IEEE('foo')); +SELECT 'CRC64()'; +SELECT hex(CRC64('foo')); diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 02a8e1d64aa..32186bfb74e 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -195,7 +195,20 @@ Returns a string that removes the whitespace characters on either side. ## CRC32(s) -Returns the CRC32 checksum of a string +Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). + The result type is UInt32. +## CRC32IEEE(s) + +Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial. + +The result type is UInt32. + +## CRC64(s) + +Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial. + +The result type is UInt64. + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index 193da6f2753..e6753247ea9 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -155,7 +155,20 @@ SELECT startsWith('Hello, world!', 'He'); ## CRC32(s) -Возвращает чексумму CRC32 данной строки. +Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib). + Тип результата - UInt32. +## CRC32IEEE(s) + +Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен. + +Тип результата - UInt32. + +## CRC64(s) + +Возвращает чексумму CRC64 данной строки, используется CRC-64-ECMA многочлен. + +Тип результата - UInt64. + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) From 8e48430159762345e2e9d221d997803b5bae529d Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 25 Oct 2019 21:49:49 +0400 Subject: [PATCH 118/122] Add handling of SQL_TINYINT and SQL_BIGINT Fix handling of SQL_FLOAT --- dbms/programs/odbc-bridge/ColumnInfoHandler.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp index b188c0bea88..594cddfd3db 100644 --- a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -38,12 +38,16 @@ namespace switch (type) { + case SQL_TINYINT: + return factory.get("Int8"); case SQL_INTEGER: return factory.get("Int32"); case SQL_SMALLINT: return factory.get("Int16"); + case SQL_BIGINT: + return factory.get("Int64"); case SQL_FLOAT: - return factory.get("Float32"); + return factory.get("Float64"); case SQL_REAL: return factory.get("Float32"); case SQL_DOUBLE: From 04f1e6b2cc2dd544677200259fd86170a3b324ae Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 25 Oct 2019 23:32:29 +0300 Subject: [PATCH 119/122] Use crc32_z() over crc32(), since it size_t for length --- dbms/src/Functions/CRC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/CRC.cpp b/dbms/src/Functions/CRC.cpp index 38462ff8884..e506812d94c 100644 --- a/dbms/src/Functions/CRC.cpp +++ b/dbms/src/Functions/CRC.cpp @@ -62,7 +62,7 @@ struct CRC32ZLIBImpl static constexpr auto name = "CRC32"; static UInt32 make_crc(const unsigned char *buf, size_t size) - { return crc32(0L, buf, size); } + { return crc32_z(0L, buf, size); } }; } // \anonymous From fe98e90d0dfdb6687b116f5df593d7954ae9dc21 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 25 Oct 2019 23:31:56 +0300 Subject: [PATCH 120/122] Avoid conflicts of static zlib and mariadb-connector-c zlib After crc32() had been replaced with crc32_z() the following error will happen with two different zlib: 2019-10-25 09:48:42 /usr/bin/ld.gold: error: contrib/zlib-ng/libz.a(crc32.c.o): multiple definition of 'get_crc_table' 2019-10-25 09:48:42 /usr/bin/ld.gold: contrib/mariadb-connector-c/libmariadb/libmariadbclient.a(crc32.c.o): previous definition here Fix this by using zlibstatic compiled for and by CH in mariadb-connector-c, and wrap into function reduce variable scopes. --- contrib/CMakeLists.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 06c33fb7e74..b0a271b21ac 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -124,7 +124,7 @@ if (USE_INTERNAL_SSL_LIBRARY) add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY}) endif () -if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) +function(mysql_support) set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC) set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC) set(CLIENT_PLUGIN_REMOTE_IO OFF) @@ -136,7 +136,15 @@ if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) if (GLIBC_COMPATIBILITY) set(LIBM glibc-compatibility) endif() + if (USE_INTERNAL_ZLIB_LIBRARY) + set(ZLIB_FOUND ON) + set(ZLIB_LIBRARY zlibstatic) + set(WITH_EXTERNAL_ZLIB ON) + endif() add_subdirectory (mariadb-connector-c) +endfunction() +if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) + mysql_support() endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) From 0cdd7ca6b9ed769bcfa3137fb65b03427d5b8dec Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Oct 2019 01:35:12 +0200 Subject: [PATCH 121/122] Add user setting mark_cache_min_lifetime documentation --- docs/en/operations/server_settings/settings.md | 5 ++++- docs/en/operations/settings/settings.md | 6 ++++++ docs/ru/operations/server_settings/settings.md | 5 ++++- docs/ru/operations/settings/settings.md | 6 ++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 436e0bdad8a..56151911f50 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -366,12 +366,15 @@ For more information, see the section "[Creating replicated tables](../../operat ``` -## mark_cache_size +## mark_cache_size {#server-mark-cache-size} Approximate size (in bytes) of the cache of "marks" used by [MergeTree](../../operations/table_engines/mergetree.md). The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120. +!!! note IMPORTANT + This parameter could be exceeded by user's setting [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime). + **Example** ```xml diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 53b50931ec2..159d0cbe7ff 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -570,6 +570,12 @@ We are writing a URL column with the String type (average size of 60 bytes per v There usually isn't any reason to change this setting. +## mark_cache_min_lifetime {#settings-mark_cache_min_lifetime} + +If the value of [mark_cache_size](../server_settings/settings.md#server-mark-cache-size) setting is exceeded, delete only records older than mark_cache_min_lifetime seconds. If your hosts have low amount of RAM, it makes sense to lower this parameter. + +Default value: 10000 seconds. + ## max_query_size {#settings-max_query_size} The maximum part of a query that can be taken to RAM for parsing with the SQL parser. diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index e72d97bdc46..f95b0809650 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -366,12 +366,15 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## mark_cache_size +## mark_cache_size {#server-mark-cache-size} Приблизительный размер (в байтах) кеша "засечек", используемых движками таблиц семейства [MergeTree](../../operations/table_engines/mergetree.md). Кеш общий для сервера, память выделяется по мере необходимости. Кеш не может быть меньше, чем 5368709120. +!!! note ВАЖНО + Этот параметр может быть превышен при большом значении настройки пользователя [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime). + **Пример** ```xml diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index ba4f07cda3b..d37e0911698 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -511,6 +511,12 @@ ClickHouse использует этот параметр при чтении д Как правило, не имеет смысла менять эту настройку. +## mark_cache_min_lifetime {#settings-mark_cache_min_lifetime} + +Если превышено значение параметра [mark_cache_size](../server_settings/settings.md#server-mark-cache-size), то будут удалены только записи старше чем значение этого параметра. Имеет смысл понижать данный параметр при малом количестве RAM на хост-системах. + +Default value: 10000 seconds. + ## max_query_size {#settings-max_query_size} Максимальный кусок запроса, который будет считан в оперативку для разбора парсером языка SQL. From 77bdfb33507e3621b613c0213cb533b0e159bc8d Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 27 Oct 2019 22:10:54 +0800 Subject: [PATCH 122/122] Fix HAVING without GROUP BY --- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 45 +++++++++++-------- .../01020_having_without_group_by.reference | 1 + .../01020_having_without_group_by.sql | 1 + 3 files changed, 28 insertions(+), 19 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01020_having_without_group_by.reference create mode 100644 dbms/tests/queries/0_stateless/01020_having_without_group_by.sql diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 10b5e5483ee..e0d9ec970f6 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -266,11 +266,36 @@ const std::unordered_set possibly_injective_function_names "dictGetDateTime" }; +/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation. + * Instead, leave `GROUP BY const`. + * Next, see deleting the constants in the analyzeAggregation method. + */ +void appendUnusedGroupByColumn(ASTSelectQuery * select_query, const NameSet & source_columns) +{ + /// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens. + UInt64 unused_column = 0; + String unused_column_name = toString(unused_column); + + while (source_columns.count(unused_column_name)) + { + ++unused_column; + unused_column_name = toString(unused_column); + } + + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared()); + select_query->groupBy()->children.emplace_back(std::make_shared(UInt64(unused_column))); +} + /// Eliminates injective function calls and constant expressions from group by statement. void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { if (!select_query->groupBy()) + { + // If there is a HAVING clause without GROUP BY, make sure we have some aggregation happen. + if (select_query->having()) + appendUnusedGroupByColumn(select_query, source_columns); return; + } const auto is_literal = [] (const ASTPtr & ast) -> bool { @@ -345,25 +370,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum } if (group_exprs.empty()) - { - /** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation. - * Instead, leave `GROUP BY const`. - * Next, see deleting the constants in the analyzeAggregation method. - */ - - /// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens. - UInt64 unused_column = 0; - String unused_column_name = toString(unused_column); - - while (source_columns.count(unused_column_name)) - { - ++unused_column; - unused_column_name = toString(unused_column); - } - - select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared()); - select_query->groupBy()->children.emplace_back(std::make_shared(UInt64(unused_column))); - } + appendUnusedGroupByColumn(select_query, source_columns); } /// Remove duplicate items from ORDER BY. diff --git a/dbms/tests/queries/0_stateless/01020_having_without_group_by.reference b/dbms/tests/queries/0_stateless/01020_having_without_group_by.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01020_having_without_group_by.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/01020_having_without_group_by.sql b/dbms/tests/queries/0_stateless/01020_having_without_group_by.sql new file mode 100644 index 00000000000..cf9b59b35bd --- /dev/null +++ b/dbms/tests/queries/0_stateless/01020_having_without_group_by.sql @@ -0,0 +1 @@ +SELECT 1 HAVING 1;