From e42022adce7ad17f6feef2587b84ad404c6cbf05 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 30 Sep 2015 16:35:25 +0300 Subject: [PATCH 01/39] dbms: allow CREATE TEMPORARY TABLE AS [#METR-18251] --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 82a063166af..01aea41fc95 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -244,9 +244,7 @@ BlockIO InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) } if (create.is_temporary) - { context.getSessionContext().addExternalTable(table_name, res); - } else context.addTable(database_name, table_name, res); } @@ -261,8 +259,13 @@ BlockIO InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) new ProhibitColumnsBlockOutputStream{ new AddingDefaultBlockOutputStream{ new MaterializingBlockOutputStream{ - new PushingToViewsBlockOutputStream{create.database, create.table, context, query_ptr} + new PushingToViewsBlockOutputStream{ + create.database, create.table, + create.is_temporary ? context.getSessionContext() : context, + query_ptr + } }, + /// @note shouldn't these two contexts be session contexts in case of temporary table? columns, column_defaults, context, context.getSettingsRef().strict_insert_defaults }, materialized_columns From 3a8d096be227e67e2491083aa9d6dee4959324a8 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 30 Sep 2015 16:43:25 +0300 Subject: [PATCH 02/39] dbms: Server: Added functional test + comment. [#METR-18059] --- dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp | 3 ++- .../00234_disjunctive_equality_chains_optimization.reference | 0 .../00234_disjunctive_equality_chains_optimization.sql | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference create mode 100644 dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.sql diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp index b53a25909ce..1ac667fb209 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -197,7 +197,8 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain value_list->children.push_back(operands[1]); } - /// Отсортировать литералы. + /// Отсортировать литералы, чтобы они были указаны в одном и том же порядке в выражении IN. + /// Иначе они указывались бы в порядке адресов ASTLiteral, который недетерминирован. std::sort(value_list->children.begin(), value_list->children.end(), [](const DB::ASTPtr & lhs, const DB::ASTPtr & rhs) { const auto val_lhs = static_cast(&*lhs); diff --git a/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference b/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.sql b/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.sql new file mode 100644 index 00000000000..10a25cfeea0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.sql @@ -0,0 +1,4 @@ +CREATE TABLE IF NOT EXISTS test.foo(id UInt64) Engine=Memory; +INSERT INTO test.foo(id) VALUES (0),(4),(1),(1),(3),(1),(1),(2),(2),(2),(1),(2),(3),(2),(1),(1),(2),(1),(1),(1),(3),(1),(2),(2),(1),(1),(3),(1),(2),(1),(1),(3),(2),(1),(1),(4),(0); +SELECT sum(id = 3 OR id = 1 OR id = 2) AS x, sum(id = 3 OR id = 1 OR id = 2) AS x FROM test.foo; +DROP TABLE test.foo; From b8e3d35f928e61f98a07486176636cdc9d7aa8f2 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 30 Sep 2015 16:56:48 +0300 Subject: [PATCH 03/39] dbms: add test [#METR-18251] --- .../0_stateless/00234_create_temporary_table_as.reference | 1 + .../queries/0_stateless/00234_create_temporary_table_as.sql | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00234_create_temporary_table_as.reference create mode 100644 dbms/tests/queries/0_stateless/00234_create_temporary_table_as.sql diff --git a/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.reference b/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.sql b/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.sql new file mode 100644 index 00000000000..9057152e698 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.sql @@ -0,0 +1,2 @@ +create temporary table one as select 1; +select * from one; From dc087ac04d87b5f517b20ba2ea1032ca1ab79a08 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 30 Sep 2015 16:57:40 +0300 Subject: [PATCH 04/39] dbms: rename test 00234 to 00235[#METR-18251] --- ...ble_as.reference => 00235_create_temporary_table_as.reference} | 0 ...temporary_table_as.sql => 00235_create_temporary_table_as.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00234_create_temporary_table_as.reference => 00235_create_temporary_table_as.reference} (100%) rename dbms/tests/queries/0_stateless/{00234_create_temporary_table_as.sql => 00235_create_temporary_table_as.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.reference b/dbms/tests/queries/0_stateless/00235_create_temporary_table_as.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00234_create_temporary_table_as.reference rename to dbms/tests/queries/0_stateless/00235_create_temporary_table_as.reference diff --git a/dbms/tests/queries/0_stateless/00234_create_temporary_table_as.sql b/dbms/tests/queries/0_stateless/00235_create_temporary_table_as.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00234_create_temporary_table_as.sql rename to dbms/tests/queries/0_stateless/00235_create_temporary_table_as.sql From f7251168267d6a4a0b08d28ad0e0561f9aa48783 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Oct 2015 04:35:18 +0300 Subject: [PATCH 05/39] dbms: fixed error [#METR-18249] [#METR-17924]. --- dbms/src/AggregateFunctions/AggregateFunctionsUniq.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionsUniq.cpp b/dbms/src/AggregateFunctions/AggregateFunctionsUniq.cpp index c3b23a8f36b..a6bf03abb4c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionsUniq.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionsUniq.cpp @@ -39,16 +39,16 @@ AggregateFunctionPtr createAggregateFunctionUniqExact(const std::string & name, const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res = createWithNumericType(*argument_types[0]); + AggregateFunctionPtr res = createWithNumericType(*argument_types[0]); if (res) return res; else if (typeid_cast(&argument_type)) - return new AggregateFunctionUniq; + return new AggregateFunctionUniq >; else if (typeid_cast(&argument_type)) - return new AggregateFunctionUniq; + return new AggregateFunctionUniq >; else if (typeid_cast(&argument_type) || typeid_cast(&argument_type)) - return new AggregateFunctionUniq; + return new AggregateFunctionUniq >; else throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); From d339ad00482c4d2cb6359cea2cf7768f14f79030 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Oct 2015 06:30:50 +0300 Subject: [PATCH 06/39] Merge --- dbms/src/Storages/StorageChunkMerger.cpp | 3 ++- dbms/src/Storages/StorageMerge.cpp | 3 ++- dbms/src/Storages/StorageStripeLog.cpp | 34 ++++++++++++++++++------ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/StorageChunkMerger.cpp b/dbms/src/Storages/StorageChunkMerger.cpp index 78d88eb8fb9..2ed96263139 100644 --- a/dbms/src/Storages/StorageChunkMerger.cpp +++ b/dbms/src/Storages/StorageChunkMerger.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -200,7 +201,7 @@ BlockInputStreams StorageChunkMerger::read( processed_stage = tmp_processed_stage; } - return res; + return narrowBlockInputStreams(res, threads); } /// Построить блок состоящий только из возможных значений виртуальных столбцов diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 48f46d98335..3dd1526264c 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -179,7 +180,7 @@ BlockInputStreams StorageMerge::read( processed_stage = tmp_processed_stage; } - return res; + return narrowBlockInputStreams(res, threads); } /// Построить блок состоящий только из возможных значений виртуальных столбцов diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index ecf966e9d01..aecfae7fb57 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -40,13 +40,13 @@ class StripeLogBlockInputStream : public IProfilingBlockInputStream { public: StripeLogBlockInputStream(const NameSet & column_names_, StorageStripeLog & storage_, size_t max_read_buffer_size_, - const Poco::SharedPtr & index_, + std::shared_ptr & index_, IndexForNativeFormat::Blocks::const_iterator index_begin_, IndexForNativeFormat::Blocks::const_iterator index_end_) : column_names(column_names_.begin(), column_names_.end()), storage(storage_), index(index_), index_begin(index_begin_), index_end(index_end_), - data_in(storage.full_path() + "data.bin", 0, 0, max_read_buffer_size_), - block_in(data_in, 0, true, index_begin, index_end) + data_in(std::make_unique(storage.full_path() + "data.bin", 0, 0, max_read_buffer_size_)), + block_in(std::make_unique(*data_in, 0, true, index_begin, index_end)) { } @@ -64,19 +64,37 @@ public: protected: Block readImpl() override { - return block_in.read(); + Block res; + + if (block_in) + { + res = block_in->read(); + + /// Освобождаем память раньше уничтожения объекта. + if (!res) + { + block_in.reset(); + data_in.reset(); + index.reset(); + } + } + + return res; } private: NameSet column_names; StorageStripeLog & storage; - const Poco::SharedPtr index; + std::shared_ptr index; IndexForNativeFormat::Blocks::const_iterator index_begin; IndexForNativeFormat::Blocks::const_iterator index_end; - CompressedReadBufferFromFile data_in; - NativeBlockInputStream block_in; + /** unique_ptr - чтобы удалять объекты (освобождать буферы) после исчерпания источника + * - для экономии оперативки при использовании большого количества источников. + */ + std::unique_ptr data_in; + std::unique_ptr block_in; }; @@ -216,7 +234,7 @@ BlockInputStreams StorageStripeLog::read( NameSet column_names_set(column_names.begin(), column_names.end()); CompressedReadBufferFromFile index_in(full_path() + "index.mrk", 0, 0, INDEX_BUFFER_SIZE); - Poco::SharedPtr index = new IndexForNativeFormat(index_in, column_names_set); + std::shared_ptr index{std::make_shared(index_in, column_names_set)}; BlockInputStreams res; From 700e545ebf814e192c37e3c2d2b58bad18255c5c Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 1 Oct 2015 12:03:14 +0300 Subject: [PATCH 07/39] dbms: Server: Fixed reference file for functional test. [#METR-18059] --- .../00234_disjunctive_equality_chains_optimization.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference b/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference index e69de29bb2d..7a7a63dd50f 100644 --- a/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference +++ b/dbms/tests/queries/0_stateless/00234_disjunctive_equality_chains_optimization.reference @@ -0,0 +1 @@ +33 33 From ddf5996c44861a67c8ac1f4cf102e674476cb031 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 1 Oct 2015 18:10:41 +0300 Subject: [PATCH 08/39] Merge --- dbms/include/DB/Core/ErrorCodes.h | 1 + dbms/include/DB/Interpreters/Context.h | 16 +- dbms/include/DB/Interpreters/Users.h | 31 ++ dbms/src/Interpreters/Context.cpp | 35 ++- .../InterpreterShowTablesQuery.cpp | 7 +- dbms/src/Interpreters/tests/users.cpp | 287 ++++++++++++++++++ 6 files changed, 370 insertions(+), 7 deletions(-) create mode 100644 dbms/src/Interpreters/tests/users.cpp diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 037c2d6fa1e..5d11d9dd558 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -293,6 +293,7 @@ namespace ErrorCodes DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED = 288, REPLICA_IS_NOT_IN_QUORUM = 289, LIMIT_EXCEEDED = 290, + DATABASE_ACCESS_DENIED = 291, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, diff --git a/dbms/include/DB/Interpreters/Context.h b/dbms/include/DB/Interpreters/Context.h index 01cd2a82cba..09731b0c37c 100644 --- a/dbms/include/DB/Interpreters/Context.h +++ b/dbms/include/DB/Interpreters/Context.h @@ -138,8 +138,14 @@ public: bool isTableExist(const String & database_name, const String & table_name) const; bool isDatabaseExist(const String & database_name) const; void assertTableExists(const String & database_name, const String & table_name) const; - void assertTableDoesntExist(const String & database_name, const String & table_name) const; - void assertDatabaseExists(const String & database_name) const; + + /** Параметр check_database_access_rights существует, чтобы не проверить повторно права доступа к БД, + * когда assertTableDoesnExist или assertDatabaseExists вызывается внутри другой функции, которая уже + * сделала эту проверку. + */ + void assertTableDoesntExist(const String & database_name, const String & table_name, bool check_database_acccess_rights = true) const; + void assertDatabaseExists(const String & database_name, bool check_database_acccess_rights = true) const; + void assertDatabaseDoesntExist(const String & database_name) const; Tables getExternalTables() const; @@ -266,6 +272,12 @@ public: void shutdown(); private: + /** Проверить, имеет ли текущий клиент доступ к заданной базе данных. + * Если доступ запрещён, кинуть исключение. + * NOTE: Этот метод надо всегда вызывать при захваченном мьютексе shared->mutex. + */ + void checkDatabaseAccessRights(const std::string & database_name) const; + const Dictionaries & getDictionariesImpl(bool throw_on_error) const; const ExternalDictionaries & getExternalDictionariesImpl(bool throw_on_error) const; diff --git a/dbms/include/DB/Interpreters/Users.h b/dbms/include/DB/Interpreters/Users.h index 04359accba6..b5504b5be81 100644 --- a/dbms/include/DB/Interpreters/Users.h +++ b/dbms/include/DB/Interpreters/Users.h @@ -23,6 +23,7 @@ #include +#include namespace DB { @@ -285,6 +286,10 @@ struct User AddressPatterns addresses; + /// Список разрешённых баз данных. + using DatabaseSet = std::unordered_set; + DatabaseSet databases; + User(const String & name_, const String & config_elem, Poco::Util::AbstractConfiguration & config) : name(name_) { @@ -312,6 +317,21 @@ struct User quota = config.getString(config_elem + ".quota"); addresses.addFromConfig(config_elem + ".networks", config); + + /// Заполнить список разрешённых баз данных. + const auto config_sub_elem = config_elem + ".allow_databases"; + if (config.has(config_sub_elem)) + { + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(config_sub_elem, config_keys); + + databases.reserve(config_keys.size()); + for (const auto & key : config_keys) + { + const auto database_name = config.getString(config_sub_elem + "." + key); + databases.insert(database_name); + } + } } /// Для вставки в контейнер. @@ -384,6 +404,17 @@ public: return it->second; } + + /// Проверить, имеет ли заданный клиент доступ к заданной базе данных. + bool isAllowedDatabase(const std::string & user_name, const std::string & database_name) const + { + auto it = cont.find(user_name); + if (it == cont.end()) + throw Exception("Unknown user " + user_name, ErrorCodes::UNKNOWN_USER); + + const auto & user = it->second; + return user.databases.empty() || user.databases.count(database_name); + } }; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index c91be012c8b..65bafe3db10 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -238,15 +238,31 @@ QuotaForIntervals & Context::getQuota() return *quota; } +void Context::checkDatabaseAccessRights(const std::string & database_name) const +{ + if (user.empty() || (database_name == "system")) + { + /// Безымянный пользователь, т.е. сервер, имеет доступ к всем БД. + /// Все пользователи имеют доступ к БД system. + return; + } + if (!shared->users.isAllowedDatabase(user, database_name)) + throw Exception("Access denied to database " + database_name, ErrorCodes::DATABASE_ACCESS_DENIED); +} + void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where) { Poco::ScopedLock lock(shared->mutex); + checkDatabaseAccessRights(from.first); + checkDatabaseAccessRights(where.first); shared->view_dependencies[from].insert(where); } void Context::removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where) { Poco::ScopedLock lock(shared->mutex); + checkDatabaseAccessRights(from.first); + checkDatabaseAccessRights(where.first); shared->view_dependencies[from].erase(where); } @@ -255,6 +271,7 @@ Dependencies Context::getDependencies(const String & database_name, const String Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); ViewDependencies::const_iterator iter = shared->view_dependencies.find(DatabaseAndTableName(db, table_name)); if (iter == shared->view_dependencies.end()) @@ -268,6 +285,7 @@ bool Context::isTableExist(const String & database_name, const String & table_na Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); Databases::const_iterator it; return shared->databases.end() != (it = shared->databases.find(db)) @@ -279,6 +297,7 @@ bool Context::isDatabaseExist(const String & database_name) const { Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); return shared->databases.end() != shared->databases.find(db); } @@ -288,6 +307,7 @@ void Context::assertTableExists(const String & database_name, const String & tab Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); Databases::const_iterator it = shared->databases.find(db); if (shared->databases.end() == it) @@ -298,11 +318,13 @@ void Context::assertTableExists(const String & database_name, const String & tab } -void Context::assertTableDoesntExist(const String & database_name, const String & table_name) const +void Context::assertTableDoesntExist(const String & database_name, const String & table_name, bool check_database_access_rights) const { Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + if (check_database_access_rights) + checkDatabaseAccessRights(db); Databases::const_iterator it; if (shared->databases.end() != (it = shared->databases.find(db)) @@ -311,11 +333,13 @@ void Context::assertTableDoesntExist(const String & database_name, const String } -void Context::assertDatabaseExists(const String & database_name) const +void Context::assertDatabaseExists(const String & database_name, bool check_database_access_rights) const { Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + if (check_database_access_rights) + checkDatabaseAccessRights(db); if (shared->databases.end() == shared->databases.find(db)) throw Exception("Database " + db + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); @@ -327,6 +351,7 @@ void Context::assertDatabaseDoesntExist(const String & database_name) const Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); if (shared->databases.end() != shared->databases.find(db)) throw Exception("Database " + db + " already exists.", ErrorCodes::DATABASE_ALREADY_EXISTS); @@ -409,6 +434,7 @@ StoragePtr Context::getTableImpl(const String & database_name, const String & ta } String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); Databases::const_iterator it = shared->databases.find(db); if (shared->databases.end() == it) @@ -453,9 +479,10 @@ void Context::addTable(const String & database_name, const String & table_name, Poco::ScopedLock lock(shared->mutex); String db = database_name.empty() ? current_database : database_name; + checkDatabaseAccessRights(db); - assertDatabaseExists(db); - assertTableDoesntExist(db, table_name); + assertDatabaseExists(db, false); + assertTableDoesntExist(db, table_name, false); shared->databases[db][table_name] = table; } diff --git a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp index c9652c73e7c..6131b9fd598 100644 --- a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -32,7 +32,12 @@ String InterpreterShowTablesQuery::getRewrittenQuery() return "SELECT name FROM system.databases" + format_or_nothing; String database = query.from.empty() ? context.getCurrentDatabase() : query.from; - context.assertDatabaseExists(database); + + /** Параметр check_database_access_rights сбрасывается при обработке запроса SHOW TABLES для того, + * чтобы все клиенты могли видеть список всех БД и таблиц в них независимо от их прав доступа + * к этим БД. + */ + context.assertDatabaseExists(database, false); std::stringstream rewritten_query; rewritten_query << "SELECT name FROM system.tables WHERE database = " << mysqlxx::quote << database; diff --git a/dbms/src/Interpreters/tests/users.cpp b/dbms/src/Interpreters/tests/users.cpp new file mode 100644 index 00000000000..c0a80aaf5d6 --- /dev/null +++ b/dbms/src/Interpreters/tests/users.cpp @@ -0,0 +1,287 @@ +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +namespace fs = boost::filesystem; + +struct TestEntry +{ + std::string user_name; + std::string database_name; + bool is_allowed; +}; + +using TestEntries = std::vector; + +struct TestDescriptor +{ + const char * config_content; + TestEntries entries; +}; + +using TestSet = std::vector; + +/// Описание тестов. + +TestSet test_set = +{ + { + "" + " " + " " + " " + " defaultdefault" + " " + " default" + " test" + " " + " " + " " + " defaultdefault" + " " + " " + " " + "", + + { + { "default", "default", true }, + { "default", "test", true }, + { "default", "stats", false }, + { "web", "default", true }, + { "web", "test", true }, + { "web", "stats", true }, + { "analytics", "default", false }, + { "analytics", "test", false }, + { "analytics", "stats", false } + } + }, + + { + "" + " " + " " + " " + " defaultdefault" + " " + " default" + " " + " " + " " + " defaultdefault" + " " + " " + " " + "", + + { + { "default", "default", true }, + { "default", "test", false }, + { "default", "stats", false }, + { "web", "default", true }, + { "web", "test", true }, + { "web", "stats", true }, + { "analytics", "default", false }, + { "analytics", "test", false }, + { "analytics", "stats", false } + } + }, + + { + "" + " " + " " + " " + " defaultdefault" + " " + " " + " " + " " + " defaultdefault" + " " + " " + " " + "", + + { + { "default", "default", true }, + { "default", "test", true }, + { "default", "stats", true }, + { "web", "default", true }, + { "web", "test", true }, + { "web", "stats", true }, + { "analytics", "default", false }, + { "analytics", "test", false }, + { "analytics", "stats", false } + } + }, + + { + "" + " " + " " + " " + " defaultdefault" + " " + " default" + " " + " " + " " + " defaultdefault" + " " + " test" + " " + " " + " " + " " + "", + + { + { "default", "default", true }, + { "default", "test", false }, + { "default", "stats", false }, + { "web", "default", false }, + { "web", "test", true }, + { "web", "stats", false }, + { "analytics", "default", false }, + { "analytics", "test", false }, + { "analytics", "stats", false } + } + } +}; + +std::string createTmpPath(const std::string & filename); +void createFile(const std::string & filename, const char * data); +void runOneTest(size_t test_num, const TestDescriptor & test_descriptor); +auto runTestSet(const TestSet & test_set); + +std::string createTmpPath(const std::string & filename) +{ + char pattern[] = "/tmp/fileXXXXXX"; + char * dir = mkdtemp(pattern); + if (dir == nullptr) + throw std::runtime_error("Could not create directory"); + + return std::string(dir) + "/" + filename; +} + +void createFile(const std::string & filename, const char * data) +{ + std::ofstream ofs(filename.c_str()); + if (!ofs.is_open()) + throw std::runtime_error("Could not open file " + filename); + ofs << data; +} + +void runOneTest(size_t test_num, const TestDescriptor & test_descriptor) +{ + const auto path_name = createTmpPath("users.xml"); + createFile(path_name, test_descriptor.config_content); + + ConfigurationPtr config; + + try + { + config = ConfigProcessor{}.loadConfig(path_name); + } + catch (const Poco::Exception & ex) + { + std::ostringstream os; + os << "Error: " << ex.what() << ": " << ex.displayText(); + throw std::runtime_error(os.str()); + } + + DB::Users users; + + try + { + users.loadFromConfig(*config); + } + catch (const Poco::Exception & ex) + { + std::ostringstream os; + os << "Error: " << ex.what() << ": " << ex.displayText(); + throw std::runtime_error(os.str()); + } + + for (const auto & entry : test_descriptor.entries) + { + bool res; + + try + { + res = users.isAllowedDatabase(entry.user_name, entry.database_name); + } + catch (const Poco::Exception &) + { + res = false; + } + + if (res != entry.is_allowed) + { + auto to_string = [](bool access){ return (access ? "'granted'" : "'denied'"); }; + std::ostringstream os; + os << "(user=" << entry.user_name << ", database=" << entry.database_name << "): "; + os << "Expected " << to_string(entry.is_allowed) << " but got " << to_string(res); + throw std::runtime_error(os.str()); + } + } + + fs::remove_all(fs::path(path_name).parent_path().string()); +} + +auto runTestSet(const TestSet & test_set) +{ + size_t test_num = 1; + size_t failure_count = 0; + + for (const auto & test_descriptor : test_set) + { + try + { + runOneTest(test_num, test_descriptor); + std::cout << "Test " << test_num << " passed\n"; + } + catch (const std::runtime_error & ex) + { + std::cerr << "Test " << test_num << " failed with reason: " << ex.what() << "\n"; + ++failure_count; + } + catch (...) + { + std::cerr << "Test " << test_num << " failed with unknown reason\n"; + ++failure_count; + } + + ++test_num; + } + + return std::make_tuple(test_set.size(), failure_count); +} + +} + +int main() +{ + size_t test_count; + size_t failure_count; + + std::tie(test_count, failure_count) = runTestSet(test_set); + + std::cout << (test_count - failure_count) << " test(s) passed out of " << test_count << "\n"; + + return (failure_count == 0) ? 0 : EXIT_FAILURE; +} From 3ab5226a566e0534a33575467a22dbf3b198f669 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 1 Oct 2015 18:14:39 +0300 Subject: [PATCH 09/39] dbms: Server: Fixed typo in comment. [#METR-17506] --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 65bafe3db10..a0be2361156 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -242,7 +242,7 @@ void Context::checkDatabaseAccessRights(const std::string & database_name) const { if (user.empty() || (database_name == "system")) { - /// Безымянный пользователь, т.е. сервер, имеет доступ к всем БД. + /// Безымянный пользователь, т.е. сервер, имеет доступ ко всем БД. /// Все пользователи имеют доступ к БД system. return; } From fba839199a8ea229e2e12455ea149bb970ed6f47 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Oct 2015 21:02:56 +0300 Subject: [PATCH 10/39] dbms: limited queries to OLAP compatibility layer [#METR-18008]. --- dbms/src/Server/OLAPHTTPHandler.cpp | 1 + dbms/src/Server/OLAPHTTPHandler.h | 7 ++++--- dbms/src/Server/OLAPQueryConverter.cpp | 4 ++-- dbms/src/Server/Server.cpp | 4 ++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dbms/src/Server/OLAPHTTPHandler.cpp b/dbms/src/Server/OLAPHTTPHandler.cpp index 88fc7210e9c..957d578a9da 100644 --- a/dbms/src/Server/OLAPHTTPHandler.cpp +++ b/dbms/src/Server/OLAPHTTPHandler.cpp @@ -51,6 +51,7 @@ void OLAPHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco: context.setGlobalContext(*server.global_context); context.setUser(user, password, request.clientAddress().host(), quota_key); + context.setSetting("profile", profile); context.setInterface(Context::Interface::OLAP_HTTP); context.setHTTPMethod(Context::HTTPMethod::POST); diff --git a/dbms/src/Server/OLAPHTTPHandler.h b/dbms/src/Server/OLAPHTTPHandler.h index c9d61faa152..7124c67eb1c 100644 --- a/dbms/src/Server/OLAPHTTPHandler.h +++ b/dbms/src/Server/OLAPHTTPHandler.h @@ -11,8 +11,9 @@ class OLAPHTTPHandler : public Poco::Net::HTTPRequestHandler { public: OLAPHTTPHandler(Server & server_) - : server(server_) - , log(&Logger::get("OLAPHTTPHandler")) + : server(server_), + log(&Logger::get("OLAPHTTPHandler")), + profile(Poco::Util::Application::instance().config().getString("olap_compatibility.profile")) { } @@ -20,8 +21,8 @@ public: private: Server & server; - Logger * log; + const String profile; void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response); }; diff --git a/dbms/src/Server/OLAPQueryConverter.cpp b/dbms/src/Server/OLAPQueryConverter.cpp index 9026f19199e..b2f7c4e1a95 100644 --- a/dbms/src/Server/OLAPQueryConverter.cpp +++ b/dbms/src/Server/OLAPQueryConverter.cpp @@ -10,8 +10,8 @@ namespace OLAP QueryConverter::QueryConverter(Poco::Util::AbstractConfiguration & config) { - table_for_single_counter = config.getString("olap_table_for_single_counter"); - table_for_all_counters = config.getString("olap_table_for_all_counters"); + table_for_single_counter = config.getString("olap_compatibility.table_for_single_counter"); + table_for_all_counters = config.getString("olap_compatibility.table_for_all_counters"); fillFormattedAttributeMap(); fillNumericAttributeMap(); diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 4c04f24b030..85752ce6295 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -613,7 +613,6 @@ int Server::main(const std::vector & args) const std::string listen_host = config().getString("listen_host", "::"); - bool use_olap_server = config().getBool("use_olap_http_server", false); Poco::Timespan keep_alive_timeout(config().getInt("keep_alive_timeout", 10), 0); Poco::ThreadPool server_pool(3, config().getInt("max_connections", 1024)); @@ -657,12 +656,13 @@ int Server::main(const std::vector & args) /// OLAP HTTP Poco::SharedPtr olap_http_server; + bool use_olap_server = config().has("olap_compatibility.port"); if (use_olap_server) { olap_parser.reset(new OLAP::QueryParser()); olap_converter.reset(new OLAP::QueryConverter(config())); - Poco::Net::ServerSocket olap_http_socket(Poco::Net::SocketAddress(listen_host, config().getInt("olap_http_port"))); + Poco::Net::ServerSocket olap_http_socket(Poco::Net::SocketAddress(listen_host, config().getInt("olap_compatibility.port"))); olap_http_socket.setReceiveTimeout(settings.receive_timeout); olap_http_socket.setSendTimeout(settings.send_timeout); olap_http_server = new Poco::Net::HTTPServer( From b8c866245727904c416793acb9df343a963177ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Oct 2015 21:33:46 +0300 Subject: [PATCH 11/39] dbms: preparation [#METR-18167]. --- .../MergeTree/ReplicatedMergeTreeAddress.h | 64 +++++++++++++++++++ .../ReplicatedMergeTreeQuorumEntry.h | 6 ++ .../ReplicatedMergeTreeRestartingThread.cpp | 18 +++--- .../Storages/StorageReplicatedMergeTree.cpp | 11 +--- 4 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeAddress.h diff --git a/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeAddress.h b/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeAddress.h new file mode 100644 index 00000000000..931f87cf14c --- /dev/null +++ b/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeAddress.h @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +/// Позволяет узнать, куда отправлять запросы, чтобы попасть на реплику. + +struct ReplicatedMergeTreeAddress +{ + String host; + UInt16 replication_port; + UInt16 queries_port; + String database; + String table; + + ReplicatedMergeTreeAddress() {} + ReplicatedMergeTreeAddress(const String & str) + { + fromString(str); + } + + void writeText(WriteBuffer & out) const + { + out + << "host: " << escape << host << '\n' + << "port: " << replication_port << '\n' + << "tcp_port: " << queries_port << '\n' + << "database: " << escape << database << '\n' + << "table: " << escape << table << '\n'; + } + + void readText(ReadBuffer & in) + { + in + >> "host: " >> escape >> host >> "\n" + >> "port: " >> replication_port >> "\n" + >> "tcp_port: " >> queries_port >> "\n" + >> "database: " >> escape >> database >> "\n" + >> "table: " >> escape >> table >> "\n"; + } + + String toString() const + { + String res; + { + WriteBufferFromString out(res); + writeText(out); + } + return res; + } + + void fromString(const String & str) + { + ReadBufferFromString in(str); + readText(in); + } +}; + +} diff --git a/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h b/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h index f416fbeda81..ac55f7a29e8 100644 --- a/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h +++ b/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h @@ -22,6 +22,12 @@ struct ReplicatedMergeTreeQuorumEntry size_t required_number_of_replicas; std::set replicas; + ReplicatedMergeTreeQuorumEntry() {} + ReplicatedMergeTreeQuorumEntry(const String & str) + { + fromString(str); + } + void writeText(WriteBuffer & out) const { out << "version: 1\n" diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 52f497014c0..183139a5864 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -224,14 +225,13 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() auto host_port = storage.context.getInterserverIOAddress(); auto zookeeper = storage.getZooKeeper(); - std::string address; - { - WriteBufferFromString address_buf(address); - address_buf - << "host: " << host_port.first << '\n' - << "port: " << host_port.second << '\n' - << "tcp_port: " << storage.context.getTCPPort() << '\n'; - } + /// Как другие реплики могут обращаться к данной. + ReplicatedMergeTreeAddress address; + address.host = host_port.first; + address.replication_port = host_port.second; + address.queries_port = storage.context.getTCPPort(); + address.database = storage.database_name; + address.table = storage.table_name; String is_active_path = storage.replica_path + "/is_active"; @@ -258,7 +258,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() zkutil::Ops ops; ops.push_back(new zkutil::Op::Create(is_active_path, active_node_identifier, zookeeper->getDefaultACL(), zkutil::CreateMode::Ephemeral)); - ops.push_back(new zkutil::Op::SetData(storage.replica_path + "/host", address, -1)); + ops.push_back(new zkutil::Op::SetData(storage.replica_path + "/host", address.toString(), -1)); try { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index a852cde7274..bdd98f9e740 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -2415,15 +2416,9 @@ void StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin if (!to_detached) table_lock = lockStructure(true); - String host; - int port; + ReplicatedMergeTreeAddress address(zookeeper->get(replica_path + "/host")); - String host_port_str = zookeeper->get(replica_path + "/host"); - ReadBufferFromString buf(host_port_str); - buf >> "host: " >> host >> "\n" - >> "port: " >> port >> "\n"; - - MergeTreeData::MutableDataPartPtr part = fetcher.fetchPart(part_name, replica_path, host, port, to_detached); + MergeTreeData::MutableDataPartPtr part = fetcher.fetchPart(part_name, replica_path, address.host, address.replication_port, to_detached); if (!to_detached) { From 51cdac107be36a7929cadebc8b7013240d48992e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Oct 2015 23:45:43 +0300 Subject: [PATCH 12/39] dbms: tests: removed executable flag on few files [#METR-2944]. --- dbms/tests/queries/0_stateless/00071_insert_fewer_columns.sql | 0 dbms/tests/queries/0_stateless/00072_in_types.sql | 0 .../0_stateless/00073_merge_sorting_empty_array_joined.sql | 0 .../00074_replicated_attach_race_condition_zookeeper.sql | 0 .../0_stateless/00075_formatting_negate_of_negative_literal.sql | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dbms/tests/queries/0_stateless/00071_insert_fewer_columns.sql mode change 100755 => 100644 dbms/tests/queries/0_stateless/00072_in_types.sql mode change 100755 => 100644 dbms/tests/queries/0_stateless/00073_merge_sorting_empty_array_joined.sql mode change 100755 => 100644 dbms/tests/queries/0_stateless/00074_replicated_attach_race_condition_zookeeper.sql mode change 100755 => 100644 dbms/tests/queries/0_stateless/00075_formatting_negate_of_negative_literal.sql diff --git a/dbms/tests/queries/0_stateless/00071_insert_fewer_columns.sql b/dbms/tests/queries/0_stateless/00071_insert_fewer_columns.sql old mode 100755 new mode 100644 diff --git a/dbms/tests/queries/0_stateless/00072_in_types.sql b/dbms/tests/queries/0_stateless/00072_in_types.sql old mode 100755 new mode 100644 diff --git a/dbms/tests/queries/0_stateless/00073_merge_sorting_empty_array_joined.sql b/dbms/tests/queries/0_stateless/00073_merge_sorting_empty_array_joined.sql old mode 100755 new mode 100644 diff --git a/dbms/tests/queries/0_stateless/00074_replicated_attach_race_condition_zookeeper.sql b/dbms/tests/queries/0_stateless/00074_replicated_attach_race_condition_zookeeper.sql old mode 100755 new mode 100644 diff --git a/dbms/tests/queries/0_stateless/00075_formatting_negate_of_negative_literal.sql b/dbms/tests/queries/0_stateless/00075_formatting_negate_of_negative_literal.sql old mode 100755 new mode 100644 From f356545bbe7e5171487ad6ef86989f15d1773f94 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Oct 2015 00:28:19 +0300 Subject: [PATCH 13/39] dbms: allowed DROP/DETACH partition on non-leader [#METR-17573]. --- dbms/include/DB/Core/ErrorCodes.h | 3 +- dbms/include/DB/Parsers/ASTAlterQuery.h | 8 ++-- dbms/include/DB/Storages/IStorage.h | 4 +- dbms/include/DB/Storages/StorageMergeTree.h | 4 +- .../DB/Storages/StorageReplicatedMergeTree.h | 4 +- .../Interpreters/InterpreterAlterQuery.cpp | 4 +- dbms/src/Storages/StorageMergeTree.cpp | 4 +- .../Storages/StorageReplicatedMergeTree.cpp | 44 ++++++++++++++++--- ...ted_drop_on_non_leader_zookeeper.reference | 5 +++ ...eplicated_drop_on_non_leader_zookeeper.sql | 18 ++++++++ 10 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.sql diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 5d11d9dd558..ab0e1cd7c6a 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -252,7 +252,6 @@ namespace ErrorCodes CORRUPTED_DATA = 246, INCORRECT_MARK = 247, INVALID_PARTITION_NAME = 248, - NOT_LEADER = 249, NOT_ENOUGH_BLOCK_NUMBERS = 250, NO_SUCH_REPLICA = 251, TOO_MUCH_PARTS = 252, @@ -280,7 +279,7 @@ namespace ErrorCodes AIO_READ_ERROR = 274, AIO_WRITE_ERROR = 275, INDEX_NOT_USED = 277, - LEADERSHIP_LOST = 278, + LEADERSHIP_CHANGED = 278, ALL_CONNECTION_TRIES_FAILED = 279, NO_AVAILABLE_DATA = 280, DICTIONARY_IS_EMPTY = 281, diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index d0e88080071..d89e4c32bac 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -63,9 +63,9 @@ public: void clone(Parameters & p) const { p = *this; - p.col_decl = col_decl->clone(); - p.column = column->clone(); - p.partition = partition->clone(); + if (col_decl) p.col_decl = col_decl->clone(); + if (column) p.column = column->clone(); + if (partition) p.partition = partition->clone(); } }; typedef std::vector ParameterContainer; @@ -95,9 +95,7 @@ public: { ASTAlterQuery * res = new ASTAlterQuery(*this); for (ParameterContainer::size_type i = 0; i < parameters.size(); ++i) - { parameters[i].clone(res->parameters[i]); - } return res; } diff --git a/dbms/include/DB/Storages/IStorage.h b/dbms/include/DB/Storages/IStorage.h index 05fddedf760..1404fb8986a 100644 --- a/dbms/include/DB/Storages/IStorage.h +++ b/dbms/include/DB/Storages/IStorage.h @@ -208,14 +208,14 @@ public: /** Выполнить запрос (DROP|DETACH) PARTITION. */ - virtual void dropPartition(const Field & partition, bool detach, bool unreplicated, const Settings & settings) + virtual void dropPartition(ASTPtr query, const Field & partition, bool detach, bool unreplicated, const Settings & settings) { throw Exception("Method dropPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Выполнить запрос ATTACH [UNREPLICATED] (PART|PARTITION). */ - virtual void attachPartition(const Field & partition, bool unreplicated, bool part, const Settings & settings) + virtual void attachPartition(ASTPtr query, const Field & partition, bool unreplicated, bool part, const Settings & settings) { throw Exception("Method attachPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/include/DB/Storages/StorageMergeTree.h b/dbms/include/DB/Storages/StorageMergeTree.h index da39ebf0a1e..4d83a21fbb7 100644 --- a/dbms/include/DB/Storages/StorageMergeTree.h +++ b/dbms/include/DB/Storages/StorageMergeTree.h @@ -89,8 +89,8 @@ public: return merge(settings.min_bytes_to_use_direct_io, true); } - void dropPartition(const Field & partition, bool detach, bool unreplicated, const Settings & settings) override; - void attachPartition(const Field & partition, bool unreplicated, bool part, const Settings & settings) override; + void dropPartition(ASTPtr query, const Field & partition, bool detach, bool unreplicated, const Settings & settings) override; + void attachPartition(ASTPtr query, const Field & partition, bool unreplicated, bool part, const Settings & settings) override; void freezePartition(const Field & partition, const Settings & settings) override; void drop() override; diff --git a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h index 60a21abd9f4..02e70818f58 100644 --- a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h +++ b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h @@ -127,8 +127,8 @@ public: void alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) override; - void dropPartition(const Field & partition, bool detach, bool unreplicated, const Settings & settings) override; - void attachPartition(const Field & partition, bool unreplicated, bool part, const Settings & settings) override; + void dropPartition(ASTPtr query, const Field & partition, bool detach, bool unreplicated, const Settings & settings) override; + void attachPartition(ASTPtr query, const Field & partition, bool unreplicated, bool part, const Settings & settings) override; void fetchPartition(const Field & partition, const String & from, const Settings & settings) override; void freezePartition(const Field & partition, const Settings & settings) override; diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 27275435ac8..1f8cbcfcc36 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -43,11 +43,11 @@ BlockIO InterpreterAlterQuery::execute() switch (command.type) { case PartitionCommand::DROP_PARTITION: - table->dropPartition(command.partition, command.detach, command.unreplicated, context.getSettingsRef()); + table->dropPartition(query_ptr, command.partition, command.detach, command.unreplicated, context.getSettingsRef()); break; case PartitionCommand::ATTACH_PARTITION: - table->attachPartition(command.partition, command.unreplicated, command.part, context.getSettingsRef()); + table->attachPartition(query_ptr, command.partition, command.unreplicated, command.part, context.getSettingsRef()); break; case PartitionCommand::FETCH_PARTITION: diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index c4cc5712f98..09aa2a03b16 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -280,7 +280,7 @@ bool StorageMergeTree::canMergeParts(const MergeTreeData::DataPartPtr & left, co } -void StorageMergeTree::dropPartition(const Field & partition, bool detach, bool unreplicated, const Settings & settings) +void StorageMergeTree::dropPartition(ASTPtr query, const Field & partition, bool detach, bool unreplicated, const Settings & settings) { if (unreplicated) throw Exception("UNREPLICATED option for DROP has meaning only for ReplicatedMergeTree", ErrorCodes::BAD_ARGUMENTS); @@ -314,7 +314,7 @@ void StorageMergeTree::dropPartition(const Field & partition, bool detach, bool } -void StorageMergeTree::attachPartition(const Field & field, bool unreplicated, bool part, const Settings & settings) +void StorageMergeTree::attachPartition(ASTPtr query, const Field & field, bool unreplicated, bool part, const Settings & settings) { if (unreplicated) throw Exception("UNREPLICATED option for ATTACH has meaning only for ReplicatedMergeTree", ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index bdd98f9e740..815a7f9d72f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -2804,21 +2807,52 @@ void StorageReplicatedMergeTree::dropUnreplicatedPartition(const Field & partiti } -void StorageReplicatedMergeTree::dropPartition(const Field & field, bool detach, bool unreplicated, const Settings & settings) +void StorageReplicatedMergeTree::dropPartition(ASTPtr query, const Field & field, bool detach, bool unreplicated, const Settings & settings) { if (unreplicated) { dropUnreplicatedPartition(field, detach, settings); - return; } auto zookeeper = getZooKeeper(); String month_name = MergeTreeData::getMonthName(field); - /// TODO: Делать запрос в лидера по TCP. if (!is_leader_node) - throw Exception(String(detach ? "DETACH" : "DROP") + " PARTITION can only be done on leader replica.", ErrorCodes::NOT_LEADER); + { + /// Проксируем запрос в лидера. + + auto live_replicas = zookeeper->getChildren(zookeeper_path + "/leader_election"); + if (live_replicas.empty()) + throw Exception("No active replicas", ErrorCodes::NO_ACTIVE_REPLICAS); + + const auto leader = zookeeper->get(zookeeper_path + "/leader_election/" + live_replicas.front()); + + if (leader == replica_name) + throw Exception("Leader was suddenly changed or logical error.", ErrorCodes::LEADERSHIP_CHANGED); + + ReplicatedMergeTreeAddress leader_address(zookeeper->get(zookeeper_path + "/replicas/" + leader + "/host")); + + auto new_query = query->clone(); + auto & alter = typeid_cast(*new_query); + + alter.database = leader_address.database; + alter.table = leader_address.table; + + /// NOTE Работает только если есть доступ от пользователя default без пароля. Можно исправить с помощью добавления параметра в конфиг сервера. + + Connection connection( + leader_address.host, + leader_address.queries_port, + leader_address.database, + "", "", "ClickHouse replica"); + + RemoteBlockInputStream stream(connection, formattedAST(new_query), &settings); + NullBlockOutputStream output; + + copyData(stream, output); + return; + } /** Пропустим один номер в block_numbers для удаляемого месяца, и будем удалять только куски до этого номера. * Это запретит мерджи удаляемых кусков с новыми вставляемыми данными. @@ -2871,7 +2905,7 @@ void StorageReplicatedMergeTree::dropPartition(const Field & field, bool detach, } -void StorageReplicatedMergeTree::attachPartition(const Field & field, bool unreplicated, bool attach_part, const Settings & settings) +void StorageReplicatedMergeTree::attachPartition(ASTPtr query, const Field & field, bool unreplicated, bool attach_part, const Settings & settings) { auto zookeeper = getZooKeeper(); String partition; diff --git a/dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.reference b/dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.reference new file mode 100644 index 00000000000..0361ab7194b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.reference @@ -0,0 +1,5 @@ +2014-01-01 +2014-02-01 +2014-03-01 +2014-01-01 +2014-03-01 diff --git a/dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.sql b/dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.sql new file mode 100644 index 00000000000..7d65e403b83 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper.sql @@ -0,0 +1,18 @@ +SET replication_alter_partitions_sync = 2; + +DROP TABLE IF EXISTS test.attach_r1; +DROP TABLE IF EXISTS test.attach_r2; + +CREATE TABLE test.attach_r1 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01/attach', 'r1', d, d, 8192); +CREATE TABLE test.attach_r2 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01/attach', 'r2', d, d, 8192); + +INSERT INTO test.attach_r1 VALUES ('2014-01-01'), ('2014-02-01'), ('2014-03-01'); + +SELECT d FROM test.attach_r1 ORDER BY d; + +ALTER TABLE test.attach_r2 DROP PARTITION 201402; + +SELECT d FROM test.attach_r1 ORDER BY d; + +DROP TABLE test.attach_r1; +DROP TABLE test.attach_r2; From 733324645f5129be190a94bea568ea03323a3b57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 06:17:36 +0300 Subject: [PATCH 14/39] dbms: aggregation by arrays: preparation [#METR-13168]. --- .../DB/Columns/ColumnAggregateFunction.h | 10 + dbms/include/DB/Columns/ColumnArray.h | 27 ++ dbms/include/DB/Columns/ColumnConst.h | 10 + dbms/include/DB/Columns/ColumnFixedString.h | 15 + dbms/include/DB/Columns/ColumnNested.h | 314 ------------------ dbms/include/DB/Columns/ColumnString.h | 28 ++ dbms/include/DB/Columns/ColumnTuple.h | 38 ++- dbms/include/DB/Columns/ColumnVector.h | 13 + dbms/include/DB/Columns/IColumn.h | 16 + dbms/include/DB/Columns/IColumnDummy.h | 10 + dbms/include/DB/DataTypes/DataTypeNested.h | 71 +--- .../DB/Storages/MergeTree/MergeTreeReader.h | 1 - .../BlockInputStreamFromRowInputStream.cpp | 5 +- dbms/src/DataTypes/DataTypeNested.cpp | 182 +--------- .../MergeTree/MergeTreeDataWriter.cpp | 1 - dbms/src/Storages/StorageLog.cpp | 1 - dbms/src/Storages/StorageStripeLog.cpp | 1 - dbms/src/Storages/StorageTinyLog.cpp | 1 - 18 files changed, 176 insertions(+), 568 deletions(-) delete mode 100644 dbms/include/DB/Columns/ColumnNested.h diff --git a/dbms/include/DB/Columns/ColumnAggregateFunction.h b/dbms/include/DB/Columns/ColumnAggregateFunction.h index 68bf2970a4f..e9e0522a115 100644 --- a/dbms/include/DB/Columns/ColumnAggregateFunction.h +++ b/dbms/include/DB/Columns/ColumnAggregateFunction.h @@ -182,6 +182,16 @@ public: throw Exception("Method insertDefault is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED); } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + throw Exception("Method serializeValueIntoArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + throw Exception("Method deserializeAndInsertFromArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + size_t byteSize() const override { return getData().size() * sizeof(getData()[0]); diff --git a/dbms/include/DB/Columns/ColumnArray.h b/dbms/include/DB/Columns/ColumnArray.h index 170dafce004..e3420d0b22d 100644 --- a/dbms/include/DB/Columns/ColumnArray.h +++ b/dbms/include/DB/Columns/ColumnArray.h @@ -119,6 +119,33 @@ public: getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + elems); } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + size_t array_size = sizeAt(n); + size_t offset = offsetAt(n); + + char * pos = arena.allocContinue(sizeof(array_size), begin); + memcpy(pos, &array_size, sizeof(array_size)); + + size_t values_size = 0; + for (size_t i = 0; i < array_size; ++i) + values_size += data->serializeValueIntoArena(offset + i, arena, begin).size; + + return StringRef(begin, sizeof(array_size) + values_size); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + size_t array_size = *reinterpret_cast(pos); + pos += sizeof(array_size); + + for (size_t i = 0; i < array_size; ++i) + pos = data->deserializeAndInsertFromArena(pos); + + getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + array_size); + return pos; + } + ColumnPtr cut(size_t start, size_t length) const override; void insert(const Field & x) override diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index fad47e76f96..0a9a87b665b 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -89,6 +89,16 @@ public: void insertDefault() override { ++s; } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + throw Exception("Method serializeValueIntoArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + throw Exception("Method deserializeAndInsertFromArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + ColumnPtr filter(const Filter & filt) const override { if (s != filt.size()) diff --git a/dbms/include/DB/Columns/ColumnFixedString.h b/dbms/include/DB/Columns/ColumnFixedString.h index c2ecba68768..20d695d4646 100644 --- a/dbms/include/DB/Columns/ColumnFixedString.h +++ b/dbms/include/DB/Columns/ColumnFixedString.h @@ -111,6 +111,21 @@ public: chars.resize_fill(chars.size() + n); } + StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const override + { + auto pos = arena.allocContinue(n, begin); + memcpy(pos, &chars[n * index], n); + return StringRef(pos, n); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + size_t old_size = chars.size(); + chars.resize(old_size + n); + memcpy(&chars[old_size], pos, n); + return pos + n; + } + int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int nan_direction_hint) const override { const ColumnFixedString & rhs = static_cast(rhs_); diff --git a/dbms/include/DB/Columns/ColumnNested.h b/dbms/include/DB/Columns/ColumnNested.h deleted file mode 100644 index 401ac076f3f..00000000000 --- a/dbms/include/DB/Columns/ColumnNested.h +++ /dev/null @@ -1,314 +0,0 @@ -#pragma once - -#include // memcpy - -#include - -#include -#include -#include - -#include -#include - - -namespace DB -{ - -using Poco::SharedPtr; - -/** Cтолбeц значений типа вложенная таблица. - * В памяти это выглядит, как столбцы вложенных типов одинковой длины, равной сумме размеров всех массивов с общим именем, - * и как общий для всех столбцов массив смещений, который позволяет достать каждый элемент. - * - * Не предназначен для возвращения результа в запросах SELECT. Предполагается, что для SELECT'а будут отдаваться - * столбцы вида ColumnArray, ссылающиеся на один массив Offset'ов и соответствующий массив с данными. - * - * Используется для сериализации вложенной таблицы. - */ -class ColumnNested final : public IColumn -{ -public: - /** По индексу i находится смещение до начала i + 1 -го элемента. */ - typedef ColumnVector ColumnOffsets_t; - - /** Создать пустой столбец вложенных таблиц, с типом значений, как в столбце nested_column */ - explicit ColumnNested(Columns nested_columns, ColumnPtr offsets_column = nullptr) - : data(nested_columns), offsets(offsets_column) - { - if (!offsets_column) - { - offsets = new ColumnOffsets_t; - } - else - { - if (!typeid_cast(&*offsets_column)) - throw Exception("offsets_column must be a ColumnVector", ErrorCodes::ILLEGAL_COLUMN); - } - } - - std::string getName() const override - { - std::string res; - { - WriteBufferFromString out(res); - - for (Columns::const_iterator it = data.begin(); it != data.end(); ++it) - { - if (it != data.begin()) - writeCString(", ", out); - writeString((*it)->getName(), out); - } - } - return "ColumnNested(" + res + ")"; - } - - ColumnPtr cloneEmpty() const override - { - Columns res(data.size()); - for (size_t i = 0; i < data.size(); ++i) - res[i] = data[i]->cloneEmpty(); - return new ColumnNested(res); - } - - size_t size() const override - { - return getOffsets().size(); - } - - Field operator[](size_t n) const override - { - throw Exception("Method operator[] is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - void get(size_t n, Field & res) const override - { - throw Exception("Method get is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - StringRef getDataAt(size_t n) const override - { - throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - void insertData(const char * pos, size_t length) override - { - throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - ColumnPtr cut(size_t start, size_t length) const override - { - if (length == 0) - return new ColumnNested(data); - - if (start + length > getOffsets().size()) - throw Exception("Parameter out of bound in ColumnNested::cut() method.", - ErrorCodes::PARAMETER_OUT_OF_BOUND); - - size_t nested_offset = offsetAt(start); - size_t nested_length = getOffsets()[start + length - 1] - nested_offset; - - ColumnNested * res_ = new ColumnNested(data); - ColumnPtr res = res_; - - for (size_t i = 0; i < data.size(); ++i) - res_->data[i] = data[i]->cut(nested_offset, nested_length); - - Offsets_t & res_offsets = res_->getOffsets(); - - if (start == 0) - { - res_offsets.assign(getOffsets().begin(), getOffsets().begin() + length); - } - else - { - res_offsets.resize(length); - - for (size_t i = 0; i < length; ++i) - res_offsets[i] = getOffsets()[start + i] - nested_offset; - } - - return res; - } - - void insert(const Field & x) override - { - throw Exception("Method insert is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - void insertFrom(const IColumn & src_, size_t n) override - { - const ColumnNested & src = static_cast(src_); - - if (data.size() != src.getData().size()) - throw Exception("Number of columns in nested tables do not match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - - size_t size = src.sizeAt(n); - size_t offset = src.offsetAt(n); - - for (size_t i = 0; i < data.size(); ++i) - { - if (data[i]->getName() != src.getData()[i]->getName()) - throw Exception("Types of columns in nested tables do not match.", ErrorCodes::TYPE_MISMATCH); - - for (size_t j = 0; j < size; ++j) - data[i]->insertFrom(*src.getData()[i], offset + j); - } - - getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size); - } - - void insertDefault() override - { - for (size_t i = 0; i < data.size(); ++i) - data[i]->insertDefault(); - getOffsets().push_back(getOffsets().size() == 0 ? 1 : (getOffsets().back() + 1)); - } - - ColumnPtr filter(const Filter & filt) const override - { - size_t size = getOffsets().size(); - if (size != filt.size()) - throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - if (size == 0) - return new ColumnNested(data); - - /// Не слишком оптимально. Можно сделать специализацию для массивов известных типов. - Filter nested_filt(getOffsets().back()); - for (size_t i = 0; i < size; ++i) - { - if (filt[i]) - memset(&nested_filt[offsetAt(i)], 1, sizeAt(i)); - else - memset(&nested_filt[offsetAt(i)], 0, sizeAt(i)); - } - - ColumnNested * res_ = new ColumnNested(data); - ColumnPtr res = res_; - for (size_t i = 0; i < data.size(); ++i) - res_->data[i] = data[i]->filter(nested_filt); - - Offsets_t & res_offsets = res_->getOffsets(); - res_offsets.reserve(size); - - size_t current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - if (filt[i]) - { - current_offset += sizeAt(i); - res_offsets.push_back(current_offset); - } - } - - return res; - } - - ColumnPtr replicate(const Offsets_t & offsets) const override - { - throw Exception("Replication of ColumnNested is not implemented.", ErrorCodes::NOT_IMPLEMENTED); - } - - ColumnPtr permute(const Permutation & perm, size_t limit) const override - { - size_t size = getOffsets().size(); - if (size != perm.size()) - throw Exception("Size of permutation doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - if (limit == 0) - limit = size; - else - limit = std::min(size, limit); - - if (perm.size() < limit) - throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - if (limit == 0) - return new ColumnNested(data); - - Permutation nested_perm(getOffsets().back()); - - Columns cloned_columns(data.size()); - for (size_t i = 0; i < data.size(); ++i) - cloned_columns[i] = data[i]->cloneEmpty(); - - ColumnNested * res_ = new ColumnNested(cloned_columns); - ColumnPtr res = res_; - - Offsets_t & res_offsets = res_->getOffsets(); - res_offsets.resize(limit); - size_t current_offset = 0; - - for (size_t i = 0; i < limit; ++i) - { - for (size_t j = 0; j < sizeAt(perm[i]); ++j) - nested_perm[current_offset + j] = offsetAt(perm[i]) + j; - current_offset += sizeAt(perm[i]); - res_offsets[i] = current_offset; - } - - if (current_offset != 0) - for (size_t i = 0; i < data.size(); ++i) - res_->data[i] = data[i]->permute(nested_perm, current_offset); - - return res; - } - - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override - { - throw Exception("Method compareAt is not supported for ColumnNested.", ErrorCodes::NOT_IMPLEMENTED); - } - - void getPermutation(bool reverse, size_t limit, Permutation & res) const override - { - throw Exception("Method getPermutation is not supported for ColumnNested.", ErrorCodes::NOT_IMPLEMENTED); - } - - void reserve(size_t n) override - { - getOffsets().reserve(n); - for (Columns::iterator it = data.begin(); it != data.end(); ++it) - (*it)->reserve(n); - } - - size_t byteSize() const override - { - size_t size = getOffsets().size() * sizeof(getOffsets()[0]); - for (Columns::const_iterator it = data.begin(); it != data.end(); ++it) - size += (*it)->byteSize(); - return size; - } - - void getExtremes(Field & min, Field & max) const override - { - throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - /** Более эффективные методы манипуляции */ - Columns & getData() { return data; } - const Columns & getData() const { return data; } - - Offsets_t & ALWAYS_INLINE getOffsets() - { - return static_cast(*offsets.get()).getData(); - } - - const Offsets_t & ALWAYS_INLINE getOffsets() const - { - return static_cast(*offsets.get()).getData(); - } - - ColumnPtr & getOffsetsColumn() { return offsets; } - const ColumnPtr & getOffsetsColumn() const { return offsets; } - -private: - Columns data; - ColumnPtr offsets; - - size_t ALWAYS_INLINE offsetAt(size_t i) const { return i == 0 ? 0 : getOffsets()[i - 1]; } - size_t ALWAYS_INLINE sizeAt(size_t i) const { return i == 0 ? getOffsets()[0] : (getOffsets()[i] - getOffsets()[i - 1]); } -}; - - -} diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index 805a4b096ec..1bd6def5779 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -115,6 +115,34 @@ public: offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length); } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + size_t string_size = sizeAt(n); + size_t offset = offsetAt(n); + + StringRef res; + res.size = sizeof(string_size) + string_size; + char * pos = arena.allocContinue(res.size, begin); + memcpy(pos, &string_size, sizeof(string_size)); + memcpy(pos + sizeof(string_size), &chars[offset], string_size); + res.data = pos; + + return res; + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + size_t string_size = *reinterpret_cast(pos); + pos += sizeof(string_size); + + size_t old_size = chars.size(); + chars.resize(old_size + string_size); + memcpy(&chars[old_size], pos, string_size); + + offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + string_size); + return pos + string_size; + } + ColumnPtr cut(size_t start, size_t length) const override { if (length == 0) diff --git a/dbms/include/DB/Columns/ColumnTuple.h b/dbms/include/DB/Columns/ColumnTuple.h index 4a85f036180..bfbbc4c4f59 100644 --- a/dbms/include/DB/Columns/ColumnTuple.h +++ b/dbms/include/DB/Columns/ColumnTuple.h @@ -42,8 +42,8 @@ public: { Array res; - for (Columns::const_iterator it = columns.begin(); it != columns.end(); ++it) - res.push_back((**it)[n]); + for (const auto & column : columns) + res.push_back((*column)[n]); return res; } @@ -93,8 +93,26 @@ public: void insertDefault() override { - for (Columns::iterator it = columns.begin(); it != columns.end(); ++it) - (*it)->insertDefault(); + for (auto & column : columns) + column->insertDefault(); + } + + + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + size_t values_size = 0; + for (auto & column : columns) + values_size += column->serializeValueIntoArena(n, arena, begin).size; + + return StringRef(begin, values_size); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + for (auto & column : columns) + pos = column->deserializeAndInsertFromArena(pos); + + return pos; } @@ -155,8 +173,8 @@ public: Less(const Columns & columns) { - for (Columns::const_iterator it = columns.begin(); it != columns.end(); ++it) - plain_columns.push_back(&**it); + for (const auto & column : columns) + plain_columns.push_back(column.get()); } bool operator() (size_t a, size_t b) const @@ -201,15 +219,15 @@ public: void reserve(size_t n) override { - for (Columns::iterator it = columns.begin(); it != columns.end(); ++it) - (*it)->reserve(n); + for (auto & column : columns) + column->reserve(n); } size_t byteSize() const override { size_t res = 0; - for (Columns::const_iterator it = columns.begin(); it != columns.end(); ++it) - res += (*it)->byteSize(); + for (const auto & column : columns) + res += column->byteSize(); return res; } diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index 078a474be2d..03a5443751f 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -150,6 +150,19 @@ public: data.push_back(T()); } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + auto pos = arena.allocContinue(sizeof(T), begin); + memcpy(pos, &data[n], sizeof(T)); + return StringRef(pos, sizeof(T)); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + data.push_back(*reinterpret_cast(pos)); + return pos + sizeof(T); + } + size_t byteSize() const override { return data.size() * sizeof(data[0]); diff --git a/dbms/include/DB/Columns/IColumn.h b/dbms/include/DB/Columns/IColumn.h index 236b6fd49af..c534b0fc7b0 100644 --- a/dbms/include/DB/Columns/IColumn.h +++ b/dbms/include/DB/Columns/IColumn.h @@ -22,6 +22,8 @@ typedef std::vector Columns; typedef std::vector ColumnPlainPtrs; typedef std::vector ConstColumnPlainPtrs; +class Arena; + /** Интерфейс для хранения столбцов значений в оперативке. */ @@ -134,6 +136,20 @@ public: */ virtual void insertDefault() = 0; + /** Сериализовать значение, расположив его в непрерывном куске памяти в Arena. + * Значение можно будет потом прочитать обратно. Используется для агрегации. + * Метод похож на getDataAt, но может работать для тех случаев, + * когда значение не однозначно соответствует какому-то уже существующему непрерывному куску памяти + * - например, для массива строк, чтобы получить однозначное соответствие, надо укладывать строки вместе с их размерами. + * Параметр begin - см. метод Arena::allocContinue. + */ + virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const = 0; + + /** Десериализовать значение, которое было сериализовано с помощью serializeValueIntoArena. + * Вернуть указатель на позицию после прочитанных данных. + */ + virtual const char * deserializeAndInsertFromArena(const char * pos) = 0; + /** Соединить столбец с одним или несколькими другими. * Используется при склейке маленьких блоков. */ diff --git a/dbms/include/DB/Columns/IColumnDummy.h b/dbms/include/DB/Columns/IColumnDummy.h index d9c559f68f8..8b0a9a4a0ab 100644 --- a/dbms/include/DB/Columns/IColumnDummy.h +++ b/dbms/include/DB/Columns/IColumnDummy.h @@ -29,6 +29,16 @@ public: StringRef getDataAt(size_t n) const override { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } void insertData(const char * pos, size_t length) override { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override + { + throw Exception("Method serializeValueIntoArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + const char * deserializeAndInsertFromArena(const char * pos) override + { + throw Exception("Method deserializeAndInsertFromArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + void getExtremes(Field & min, Field & max) const override { throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/include/DB/DataTypes/DataTypeNested.h b/dbms/include/DB/DataTypes/DataTypeNested.h index eea1bb67137..44120433089 100644 --- a/dbms/include/DB/DataTypes/DataTypeNested.h +++ b/dbms/include/DB/DataTypes/DataTypeNested.h @@ -1,84 +1,39 @@ #pragma once -#include +#include #include + namespace DB { -using Poco::SharedPtr; - - -class DataTypeNested final : public IDataType +/** Хранит набор пар (имя, тип) для вложенной структуры данных. + * Используется только при создании таблицы. Во всех остальных случаях не используется, так как раскрывается в набор отдельных столбцов с типами. + */ +class DataTypeNested final : public IDataTypeDummy { private: /// Имена и типы вложенных массивов. NamesAndTypesListPtr nested; - /// Тип смещений. - DataTypePtr offsets; public: DataTypeNested(NamesAndTypesListPtr nested_); std::string getName() const override; + DataTypePtr clone() const override + { + return new DataTypeNested(nested); + } + + const NamesAndTypesListPtr & getNestedTypesList() const { return nested; } + static std::string concatenateNestedName(const std::string & nested_table_name, const std::string & nested_field_name); /// Возвращает префикс имени до первой точки '.'. Или имя без изменений, если точки нет. static std::string extractNestedTableName(const std::string & nested_name); /// Возвращает суффикс имени после первой точки справа '.'. Или имя без изменений, если точки нет. static std::string extractNestedColumnName(const std::string & nested_name); - DataTypePtr clone() const override - { - return new DataTypeNested(nested); - } - - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - - void serializeText(const Field & field, WriteBuffer & ostr) const override; - void deserializeText(Field & field, ReadBuffer & istr) const override; - - void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const override; - void deserializeTextEscaped(Field & field, ReadBuffer & istr) const override; - - void serializeTextQuoted(const Field & field, WriteBuffer & ostr) const override; - void deserializeTextQuoted(Field & field, ReadBuffer & istr) const override; - - void serializeTextJSON(const Field & field, WriteBuffer & ostr) const override; - - /** Потоковая сериализация массивов устроена по-особенному: - * - записываются/читаются элементы, уложенные подряд, без размеров массивов; - * - размеры записываются/читаются в отдельный столбец, - * и о записи/чтении размеров должна позаботиться вызывающая сторона. - * Это нужно, так как несколько массивов имеют общие размеры. - */ - - /** Записать только значения, без размеров. Вызывающая сторона также должна куда-нибудь записать смещения. */ - void serializeBinary(const IColumn & column, WriteBuffer & ostr, size_t offset = 0, size_t limit = 0) const override; - - /** Прочитать только значения, без размеров. - * При этом, в column уже заранее должны быть считаны все размеры. - */ - void deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; - - /** Записать размеры. */ - void serializeOffsets(const IColumn & column, WriteBuffer & ostr, size_t offset = 0, size_t limit = 0) const; - - /** Прочитать размеры. Вызывайте этот метод перед чтением значений. */ - void deserializeOffsets(IColumn & column, ReadBuffer & istr, size_t limit) const; - - ColumnPtr createColumn() const override; - ColumnPtr createConstColumn(size_t size, const Field & field) const override; - - Field getDefault() const override - { - throw Exception("Method getDefault is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - const NamesAndTypesListPtr & getNestedTypesList() const { return nested; } - const DataTypePtr & getOffsetsType() const { return offsets; } - /// Создает новый список в котором колонки типа Nested заменены на несколько вида имя_колонки.имя_вложенной_ячейки static NamesAndTypesListPtr expandNestedColumns(const NamesAndTypesList & names_and_types); }; diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h b/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h index bfd35850edd..beb7786b522 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h @@ -11,7 +11,6 @@ #include #include #include -#include #include diff --git a/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp b/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp index af46a124cd0..625f2e8919e 100644 --- a/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -53,9 +52,9 @@ Block BlockInputStreamFromRowInputStream::readImpl() e.addMessage("(at row " + toString(total_rows + 1) + ")"); throw; } - + res.optimizeNestedArraysOffsets(); - + return res; } diff --git a/dbms/src/DataTypes/DataTypeNested.cpp b/dbms/src/DataTypes/DataTypeNested.cpp index e6d022f0141..cf2f70e1064 100644 --- a/dbms/src/DataTypes/DataTypeNested.cpp +++ b/dbms/src/DataTypes/DataTypeNested.cpp @@ -1,25 +1,18 @@ -#include -#include -#include - #include #include #include -#include - -#include - #include +#include namespace DB { -DataTypeNested::DataTypeNested(NamesAndTypesListPtr nested_) : nested(nested_) +DataTypeNested::DataTypeNested(NamesAndTypesListPtr nested_) + : nested(nested_) { - offsets = new DataTypeFromFieldType::Type; } @@ -71,174 +64,6 @@ std::string DataTypeNested::getName() const } -void DataTypeNested::serializeBinary(const Field & field, WriteBuffer & ostr) const -{ - throw Exception("Method serializeBinary(const Field &, WriteBuffer &) is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - -void DataTypeNested::deserializeBinary(Field & field, ReadBuffer & istr) const -{ - throw Exception("Method deserializeBinary(Field &, ReadBuffer &) is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - -void DataTypeNested::serializeBinary(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const -{ - const ColumnNested & column_nested = typeid_cast(column); - const ColumnNested::Offsets_t & offsets = column_nested.getOffsets(); - - if (offset > offsets.size()) - return; - - /** offset - с какого массива писать. - * limit - сколько массивов максимум записать, или 0, если писать всё, что есть. - * end - до какого массива заканчивается записываемый кусок. - * - * nested_offset - с какого элемента внутренностей писать. - * nested_limit - сколько элементов внутренностей писать, или 0, если писать всё, что есть. - */ - - size_t end = std::min(offset + limit, offsets.size()); - - size_t nested_offset = offset ? offsets[offset - 1] : 0; - size_t nested_limit = limit - ? offsets[end - 1] - nested_offset - : 0; - - if (limit == 0 || nested_limit) - { - NamesAndTypesList::const_iterator it = nested->begin(); - for (size_t i = 0; i < nested->size(); ++i, ++it) - it->type->serializeBinary(*column_nested.getData()[i], ostr, nested_offset, nested_limit); - } -} - - -void DataTypeNested::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const -{ - ColumnNested & column_nested = typeid_cast(column); - ColumnNested::Offsets_t & offsets = column_nested.getOffsets(); - - /// Должно быть считано согласованное с offsets количество значений. - size_t last_offset = (offsets.empty() ? 0 : offsets.back()); - if (last_offset < column_nested.size()) - throw Exception("Nested column longer than last offset", ErrorCodes::LOGICAL_ERROR); - size_t nested_limit = (offsets.empty() ? 0 : offsets.back()) - column_nested.size(); - - NamesAndTypesList::const_iterator it = nested->begin(); - for (size_t i = 0; i < nested->size(); ++i, ++it) - { - it->type->deserializeBinary(*column_nested.getData()[i], istr, nested_limit, 0); - if (column_nested.getData()[i]->size() != last_offset) - throw Exception("Cannot read all nested column values", ErrorCodes::CANNOT_READ_ALL_DATA); - } -} - - -void DataTypeNested::serializeOffsets(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const -{ - const ColumnNested & column_nested = typeid_cast(column); - const ColumnNested::Offsets_t & offsets = column_nested.getOffsets(); - size_t size = offsets.size(); - - if (!size) - return; - - size_t end = limit && (offset + limit < size) - ? offset + limit - : size; - - if (offset == 0) - { - writeIntBinary(offsets[0], ostr); - ++offset; - } - - for (size_t i = offset; i < end; ++i) - writeIntBinary(offsets[i] - offsets[i - 1], ostr); -} - - -void DataTypeNested::deserializeOffsets(IColumn & column, ReadBuffer & istr, size_t limit) const -{ - ColumnNested & column_nested = typeid_cast(column); - ColumnNested::Offsets_t & offsets = column_nested.getOffsets(); - size_t initial_size = offsets.size(); - offsets.resize(initial_size + limit); - - size_t i = initial_size; - ColumnNested::Offset_t current_offset = initial_size ? offsets[initial_size - 1] : 0; - while (i < initial_size + limit && !istr.eof()) - { - ColumnNested::Offset_t current_size = 0; - readIntBinary(current_size, istr); - current_offset += current_size; - offsets[i] = current_offset; - ++i; - } - - offsets.resize(i); -} - - -void DataTypeNested::serializeText(const Field & field, WriteBuffer & ostr) const -{ - throw Exception("Method get is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - -void DataTypeNested::deserializeText(Field & field, ReadBuffer & istr) const -{ - throw Exception("Method get is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - -void DataTypeNested::serializeTextEscaped(const Field & field, WriteBuffer & ostr) const -{ - serializeText(field, ostr); -} - - -void DataTypeNested::deserializeTextEscaped(Field & field, ReadBuffer & istr) const -{ - deserializeText(field, istr); -} - - -void DataTypeNested::serializeTextQuoted(const Field & field, WriteBuffer & ostr) const -{ - serializeText(field, ostr); -} - - -void DataTypeNested::deserializeTextQuoted(Field & field, ReadBuffer & istr) const -{ - deserializeText(field, istr); -} - - -void DataTypeNested::serializeTextJSON(const Field & field, WriteBuffer & ostr) const -{ - throw Exception("Method get is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - - -ColumnPtr DataTypeNested::createColumn() const -{ - Columns columns; - columns.reserve(nested->size()); - for (NamesAndTypesList::const_iterator it = nested->begin(); it != nested->end(); ++it) - columns.push_back(it->type->createColumn()); - return new ColumnNested(columns); -} - - -ColumnPtr DataTypeNested::createConstColumn(size_t size, const Field & field) const -{ - throw Exception("Method createConstColumn is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); -} - NamesAndTypesListPtr DataTypeNested::expandNestedColumns(const NamesAndTypesList & names_and_types) { NamesAndTypesListPtr columns = new NamesAndTypesList; @@ -258,4 +83,5 @@ NamesAndTypesListPtr DataTypeNested::expandNestedColumns(const NamesAndTypesList } return columns; } + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 87e0952c6b9..081179c4ba4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp index 4ef5aac3971..40c49c4b6bc 100644 --- a/dbms/src/Storages/StorageLog.cpp +++ b/dbms/src/Storages/StorageLog.cpp @@ -19,7 +19,6 @@ #include #include -#include #include diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index aecfae7fb57..47394a70d55 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -24,7 +24,6 @@ #include #include -#include #include #include diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 3b6a9441d34..260273e3f1c 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include From 4309ccd0d63ce65759899a7be1676761944ff51f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 06:20:03 +0300 Subject: [PATCH 15/39] dbms: addition to prev. revision [#METR-13168]. --- dbms/include/DB/Columns/ColumnArray.h | 1 + dbms/include/DB/Columns/ColumnFixedString.h | 1 + dbms/include/DB/Columns/ColumnString.h | 1 + dbms/include/DB/Columns/ColumnVector.h | 1 + 4 files changed, 4 insertions(+) diff --git a/dbms/include/DB/Columns/ColumnArray.h b/dbms/include/DB/Columns/ColumnArray.h index e3420d0b22d..dd855b76c4c 100644 --- a/dbms/include/DB/Columns/ColumnArray.h +++ b/dbms/include/DB/Columns/ColumnArray.h @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/dbms/include/DB/Columns/ColumnFixedString.h b/dbms/include/DB/Columns/ColumnFixedString.h index 20d695d4646..cd6c502974d 100644 --- a/dbms/include/DB/Columns/ColumnFixedString.h +++ b/dbms/include/DB/Columns/ColumnFixedString.h @@ -3,6 +3,7 @@ #include // memcpy #include +#include #include diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index 1bd6def5779..635bdcfbc02 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index 03a5443751f..6f403ad665f 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -4,6 +4,7 @@ #include #include +#include #include From 058e5e8de76411e7d3ecd5000ef46309d18fcfb7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 06:41:00 +0300 Subject: [PATCH 16/39] dbms: addition to prev. revision [#METR-13168]. --- dbms/src/DataTypes/tests/data_type_nested.cpp | 176 ------------------ 1 file changed, 176 deletions(-) delete mode 100644 dbms/src/DataTypes/tests/data_type_nested.cpp diff --git a/dbms/src/DataTypes/tests/data_type_nested.cpp b/dbms/src/DataTypes/tests/data_type_nested.cpp deleted file mode 100644 index 222af5b57d1..00000000000 --- a/dbms/src/DataTypes/tests/data_type_nested.cpp +++ /dev/null @@ -1,176 +0,0 @@ -#include - -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include - -struct Nested { - DB::PODArray uint8; - DB::PODArray uint64; - std::vector string; -}; - -const size_t n = 4; -const size_t sizes[n] = {3, 1, 4, 2}; - -int main(int argc, char ** argv) -{ - try - { - Poco::Stopwatch stopwatch; - - /// Nested(uint8 UInt8, uint64 UInt64, string String) - Nested nested[n]; - for (size_t i = 0; i < n; ++i) - { - for (size_t j = 0; j < sizes[i]; ++j) - { - nested[i].uint8.push_back(i * 4 + j); - nested[i].uint64.push_back(1ULL << (63 - nested[i].uint8.back())); - nested[i].string.push_back(""); - { - DB::WriteBufferFromString wb(nested[i].string.back()); - DB::writeIntText(nested[i].uint8.back(), wb); - DB::writeString("SpAcE", wb); - DB::writeIntText(nested[i].uint64.back(), wb); - } - } - } - DB::NamesAndTypesListPtr types = new DB::NamesAndTypesList; - types->push_back(DB::NameAndTypePair("uint8", new DB::DataTypeUInt8)); - types->push_back(DB::NameAndTypePair("uint64", new DB::DataTypeUInt64)); - types->push_back(DB::NameAndTypePair("string", new DB::DataTypeString)); - - DB::DataTypeNested data_type(types); - - { - DB::ColumnPtr column_p = data_type.createColumn(); - DB::ColumnNested * column = typeid_cast(&*column_p); - DB::Columns & data = column->getData(); - DB::ColumnNested::Offsets_t & offsets = column->getOffsets(); - - data.resize(3); - data[0] = new DB::ColumnUInt8; - data[1] = new DB::ColumnUInt64; - data[2] = new DB::ColumnString; - - for (size_t i = 0; i < n; ++i) - { - for (size_t j = 0; j < sizes[i]; ++j) - { - data[0]->insert(DB::Field(UInt64(nested[i].uint8[j]))); - data[1]->insert(DB::Field(nested[i].uint64[j])); - data[2]->insert(DB::Field(nested[i].string[j].data(), nested[i].string[j].size())); - } - offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + sizes[i]); - } - - stopwatch.restart(); - { - std::ofstream ostr("test.size"); - DB::WriteBufferFromOStream out_buf(ostr); - data_type.serializeOffsets(*column, out_buf); - } - { - std::ofstream ostr("test"); - DB::WriteBufferFromOStream out_buf(ostr); - data_type.serializeBinary(*column, out_buf); - } - stopwatch.stop(); - - std::cout << "Writing, elapsed: " << static_cast(stopwatch.elapsed()) / 1000000 << std::endl; - } - - { - DB::ColumnPtr column_p = data_type.createColumn(); - DB::ColumnNested * column = typeid_cast(&*column_p); - - std::ifstream istr("test"); - DB::ReadBufferFromIStream in_buf(istr); - - stopwatch.restart(); - { - std::ifstream istr("test.size"); - DB::ReadBufferFromIStream in_buf(istr); - data_type.deserializeOffsets(*column, in_buf, n); - } - { - std::ifstream istr("test"); - DB::ReadBufferFromIStream in_buf(istr); - data_type.deserializeBinary(*column, in_buf, n, 0); - } - stopwatch.stop(); - - std::cout << "Reading, elapsed: " << static_cast(stopwatch.elapsed()) / 1000000 << std::endl; - - std::cout << std::endl; - - DB::Columns & data = column->getData(); - DB::ColumnNested::Offsets_t & offsets = column->getOffsets(); - - Nested res; - res.uint8.assign(typeid_cast(*data[0]).getData()); - res.uint64.assign(typeid_cast(*data[1]).getData()); - DB::ColumnString & res_string = typeid_cast(*data[2]); - - std::cout << "offsets: ["; - for (size_t i = 0; i < offsets.size(); ++i) - { - if (i) std::cout << ", "; - std::cout << offsets[i]; - } - std::cout << "]\n" << std::endl; - - for (size_t i = 0; i < n; ++i) - { - size_t sh = i ? offsets[i - 1] : 0; - - std::cout << "["; - for (size_t j = 0; j < sizes[i]; ++j) - { - if (j) std::cout << ", "; - std::cout << int(res.uint8[sh + j]); - } - std::cout << "]\n"; - - std::cout << "["; - for (size_t j = 0; j < sizes[i]; ++j) - { - if (j) std::cout << ", "; - std::cout << res.uint64[sh + j]; - } - std::cout << "]\n"; - - std::cout << "["; - for (size_t j = 0; j < sizes[i]; ++j) - { - if (j) std::cout << ", "; - std::cout << '"' << res_string.getDataAt(sh + j).toString() << '"'; - } - std::cout << "]\n"; - - std::cout << std::endl; - } - } - } - catch (const DB::Exception & e) - { - std::cerr << e.what() << ", " << e.displayText() << std::endl; - return 1; - } - - return 0; -} From 6546fa7dbd55ada7ea59043f2bbaf6962c985906 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 06:44:58 +0300 Subject: [PATCH 17/39] dbms: fixed build [#METR-17573]. --- dbms/include/DB/Core/ErrorCodes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index ab0e1cd7c6a..8b5fb98e94f 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -279,7 +279,7 @@ namespace ErrorCodes AIO_READ_ERROR = 274, AIO_WRITE_ERROR = 275, INDEX_NOT_USED = 277, - LEADERSHIP_CHANGED = 278, + LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, NO_AVAILABLE_DATA = 280, DICTIONARY_IS_EMPTY = 281, @@ -293,6 +293,7 @@ namespace ErrorCodes REPLICA_IS_NOT_IN_QUORUM = 289, LIMIT_EXCEEDED = 290, DATABASE_ACCESS_DENIED = 291, + LEADERSHIP_CHANGED = 292, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, From 804216f678ce21b32ee155cfad6fb73b2709ad7e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 09:10:48 +0300 Subject: [PATCH 18/39] dbms: added full support of GROUP BY arrays [#METR-13168]. --- dbms/include/DB/Common/Arena.h | 7 +- .../DB/Interpreters/AggregationCommon.h | 15 ++++ dbms/include/DB/Interpreters/Aggregator.h | 78 ++++++++++++++++++- dbms/src/Interpreters/Aggregator.cpp | 37 ++++++++- 4 files changed, 125 insertions(+), 12 deletions(-) diff --git a/dbms/include/DB/Common/Arena.h b/dbms/include/DB/Common/Arena.h index 28d40fed635..0d934be7272 100644 --- a/dbms/include/DB/Common/Arena.h +++ b/dbms/include/DB/Common/Arena.h @@ -127,16 +127,15 @@ public: */ char * allocContinue(size_t size, char const *& begin) { - if (unlikely(head->pos + size > head->end)) + while (unlikely(head->pos + size > head->end)) { char * prev_end = head->pos; addChunk(size); if (begin) - { begin = insert(begin, prev_end - begin); - return allocContinue(size, begin); - } + else + break; } char * res = head->pos; diff --git a/dbms/include/DB/Interpreters/AggregationCommon.h b/dbms/include/DB/Interpreters/AggregationCommon.h index c88a4897391..6a299e236a1 100644 --- a/dbms/include/DB/Interpreters/AggregationCommon.h +++ b/dbms/include/DB/Interpreters/AggregationCommon.h @@ -182,4 +182,19 @@ static inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous( } +/** Сериализовать ключи в непрерывный кусок памяти. + */ +static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( + size_t i, size_t keys_size, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool) +{ + const char * begin = nullptr; + + size_t sum_size = 0; + for (size_t j = 0; j < keys_size; ++j) + sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size; + + return {begin, sum_size}; +} + + } diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 4c6bbd53c20..3d14a25276b 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -319,7 +319,7 @@ struct AggregationMethodKeysFixed }; -/// Для остальных случаев. Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.) +/// Агрегирует по конкатенации ключей. (При этом, строки, содержащие нули посередине, могут склеиться.) template struct AggregationMethodConcat { @@ -392,6 +392,70 @@ struct AggregationMethodConcat }; +/** Агрегирует по конкатенации сериализованных значений ключей. + * Похож на AggregationMethodConcat, но подходит, например, для массивов строк или нескольких массивов. + * Сериализованное значение отличается тем, что позволяет однозначно его десериализовать, имея только позицию, с которой оно начинается. + * То есть, например, для строк, оно содержит сначала сериализованную длину строки, а потом байты. + * Поэтому, при агрегации по нескольким строкам, неоднозначностей не возникает. + */ +template +struct AggregationMethodSerialized +{ + typedef TData Data; + typedef typename Data::key_type Key; + typedef typename Data::mapped_type Mapped; + typedef typename Data::iterator iterator; + typedef typename Data::const_iterator const_iterator; + + Data data; + + AggregationMethodSerialized() {} + + template + AggregationMethodSerialized(const Other & other) : data(other.data) {} + + struct State + { + void init(ConstColumnPlainPtrs & key_columns) + { + } + + Key getKey( + const ConstColumnPlainPtrs & key_columns, + size_t keys_size, + size_t i, + const Sizes & key_sizes, + StringRefs & keys, + Arena & pool) const + { + return serializeKeysToPoolContiguous(i, keys_size, key_columns, keys, pool); + } + }; + + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + + static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) + { + } + + static void onExistingKey(const Key & key, StringRefs & keys, Arena & pool) + { + pool.rollback(key.size); + } + + /// Если ключ уже был, то он удаляется из пула (затирается), и сравнить с ним следующий ключ уже нельзя. + static const bool no_consecutive_keys_optimization = true; + + static void insertKeyIntoColumns(const typename Data::value_type & value, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes) + { + auto pos = value.first.data; + for (size_t i = 0; i < keys_size; ++i) + pos = key_columns[i]->deserializeAndInsertFromArena(pos); + } +}; + + /// Для остальных случаев. Агрегирует по 128 битному хэшу от ключа. (При этом, строки, содержащие нули посередине, могут склеиться.) template struct AggregationMethodHashed @@ -492,6 +556,7 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> keys256; std::unique_ptr> hashed; std::unique_ptr> concat; + std::unique_ptr> serialized; std::unique_ptr> key32_two_level; std::unique_ptr> key64_two_level; @@ -501,6 +566,7 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> keys256_two_level; std::unique_ptr> hashed_two_level; std::unique_ptr> concat_two_level; + std::unique_ptr> serialized_two_level; /// В этом и подобных макросах, вариант without_key не учитывается. #define APPLY_FOR_AGGREGATED_VARIANTS(M) \ @@ -514,6 +580,7 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys256, false) \ M(hashed, false) \ M(concat, false) \ + M(serialized, false) \ M(key32_two_level, true) \ M(key64_two_level, true) \ M(key_string_two_level, true) \ @@ -521,7 +588,8 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys128_two_level, true) \ M(keys256_two_level, true) \ M(hashed_two_level, true) \ - M(concat_two_level, true) + M(concat_two_level, true) \ + M(serialized_two_level, true) \ enum class Type { @@ -636,7 +704,8 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys128) \ M(keys256) \ M(hashed) \ - M(concat) + M(concat) \ + M(serialized) \ #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ M(key8) \ @@ -667,7 +736,8 @@ struct AggregatedDataVariants : private boost::noncopyable M(keys128_two_level) \ M(keys256_two_level) \ M(hashed_two_level) \ - M(concat_two_level) + M(concat_two_level) \ + M(serialized_two_level) }; typedef SharedPtr AggregatedDataVariantsPtr; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 56ea0ea896e..fd9da886112 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -323,16 +324,33 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu bool all_fixed = true; size_t keys_bytes = 0; + + size_t num_array_keys = 0; + bool has_arrays_of_non_fixed_elems = false; + bool all_non_array_keys_are_fixed = true; + key_sizes.resize(keys_size); for (size_t j = 0; j < keys_size; ++j) { - if (!key_columns[j]->isFixed()) + if (key_columns[j]->isFixed()) + { + key_sizes[j] = key_columns[j]->sizeOfField(); + keys_bytes += key_sizes[j]; + } + else { all_fixed = false; - break; + + if (const ColumnArray * arr = typeid_cast(key_columns[j])) + { + ++num_array_keys; + + if (!arr->getData().isFixed()) + has_arrays_of_non_fixed_elems = true; + } + else + all_non_array_keys_are_fixed = false; } - key_sizes[j] = key_columns[j]->sizeOfField(); - keys_bytes += key_sizes[j]; } /// Если ключей нет @@ -367,6 +385,13 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu if (keys_size == 1 && typeid_cast(key_columns[0])) return AggregatedDataVariants::Type::key_fixed_string; + /** Если есть массивы. + * Если есть не более одного массива из элементов фиксированной длины, и остальные ключи фиксированной длины, + * то всё ещё можно использовать метод concat. Иначе - serialized. + */ + if (num_array_keys > 1 || has_arrays_of_non_fixed_elems || (num_array_keys == 1 && !all_non_array_keys_are_fixed)) + return AggregatedDataVariants::Type::serialized; + /// Иначе будем агрегировать по конкатенации ключей. return AggregatedDataVariants::Type::concat; @@ -1387,6 +1412,8 @@ AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_va mergeSingleLevelDataImplhashed)::element_type>(non_empty_data); else if (res->type == AggregatedDataVariants::Type::concat) mergeSingleLevelDataImplconcat)::element_type>(non_empty_data); + else if (res->type == AggregatedDataVariants::Type::serialized) + mergeSingleLevelDataImplserialized)::element_type>(non_empty_data); else if (res->type == AggregatedDataVariants::Type::key32_two_level) mergeTwoLevelDataImplkey32_two_level)::element_type>(non_empty_data, thread_pool.get()); else if (res->type == AggregatedDataVariants::Type::key64_two_level) @@ -1403,6 +1430,8 @@ AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_va mergeTwoLevelDataImplhashed_two_level)::element_type>(non_empty_data, thread_pool.get()); else if (res->type == AggregatedDataVariants::Type::concat_two_level) mergeTwoLevelDataImplconcat_two_level)::element_type>(non_empty_data, thread_pool.get()); + else if (res->type == AggregatedDataVariants::Type::serialized_two_level) + mergeTwoLevelDataImplserialized_two_level)::element_type>(non_empty_data, thread_pool.get()); else if (res->type != AggregatedDataVariants::Type::without_key) throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); From 49852e67fb3d6a0e9e4de1e74d612ee12e534548 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 09:25:33 +0300 Subject: [PATCH 19/39] dbms: added tests [#METR-13168]. --- .../00237_group_by_arrays.reference | 57 +++++++++++++++++++ .../0_stateless/00237_group_by_arrays.sql | 6 ++ 2 files changed, 63 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00237_group_by_arrays.reference create mode 100644 dbms/tests/queries/0_stateless/00237_group_by_arrays.sql diff --git a/dbms/tests/queries/0_stateless/00237_group_by_arrays.reference b/dbms/tests/queries/0_stateless/00237_group_by_arrays.reference new file mode 100644 index 00000000000..f68f88f88c6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00237_group_by_arrays.reference @@ -0,0 +1,57 @@ +[] [1] 1 +[1] [] 1 +[] [0,1,2,3,4,5,6,7,8,9] 1 +[0] [0,1,2,3,4,5,6,7,8] 1 +[0,1] [0,1,2,3,4,5,6,7] 1 +[0,1,2] [0,1,2,3,4,5,6] 1 +[0,1,2,3] [0,1,2,3,4,5] 1 +[0,1,2,3,4] [0,1,2,3,4] 1 +[0,1,2,3,4,5] [0,1,2,3] 1 +[0,1,2,3,4,5,6] [0,1,2] 1 +[0,1,2,3,4,5,6,7] [0,1] 1 +[0,1,2,3,4,5,6,7,8] [0] 1 +[0,1,2,3,4,5,6,7,8,9] [] 1 +[] [0,1,2,3,4,5,6,7,8,9] 1 +['0'] [0,1,2,3,4,5,6,7,8] 1 +['0','1'] [0,1,2,3,4,5,6,7] 1 +['0','1','2'] [0,1,2,3,4,5,6] 1 +['0','1','2','3'] [0,1,2,3,4,5] 1 +['0','1','2','3','4'] [0,1,2,3,4] 1 +['0','1','2','3','4','5'] [0,1,2,3] 1 +['0','1','2','3','4','5','6'] [0,1,2] 1 +['0','1','2','3','4','5','6','7'] [0,1] 1 +['0','1','2','3','4','5','6','7','8'] [0] 1 +['0','1','2','3','4','5','6','7','8','9'] [] 1 +[] ['0','1','2','3','4','5','6','7','8','9'] 1 +['0'] ['0','1','2','3','4','5','6','7','8'] 1 +['0','1'] ['0','1','2','3','4','5','6','7'] 1 +['0','1','2'] ['0','1','2','3','4','5','6'] 1 +['0','1','2','3'] ['0','1','2','3','4','5'] 1 +['0','1','2','3','4'] ['0','1','2','3','4'] 1 +['0','1','2','3','4','5'] ['0','1','2','3'] 1 +['0','1','2','3','4','5','6'] ['0','1','2'] 1 +['0','1','2','3','4','5','6','7'] ['0','1'] 1 +['0','1','2','3','4','5','6','7','8'] ['0'] 1 +['0','1','2','3','4','5','6','7','8','9'] [] 1 +[] [] 1 +['0'] [[0,1,2,3,4,5,6,7,8]] 1 +['0','1'] [[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7]] 1 +['0','1','2'] [[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6]] 1 +['0','1','2','3'] [[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5]] 1 +['0','1','2','3','4'] [[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4]] 1 +['0','1','2','3','4','5'] [[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3]] 1 +['0','1','2','3','4','5','6'] [[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2]] 1 +['0','1','2','3','4','5','6','7'] [[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1]] 1 +['0','1','2','3','4','5','6','7','8'] [[0],[0],[0],[0],[0],[0],[0],[0],[0]] 1 +['0','1','2','3','4','5','6','7','8','9'] [[],[],[],[],[],[],[],[],[],[]] 1 +[] [] 3 +['0'] [[0,1,2,3,4,5,6,7,8]] 3 +['0','1'] [[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7]] 3 +['0','1','2'] [[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6]] 3 +['0','1','2','3'] [[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5]] 3 +['0','1','2','3','4'] [[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4]] 3 +['0','1','2','3','4','5'] [[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3]] 3 +['0','1','2','3','4','5','6'] [[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2]] 3 +['0','1','2','3','4','5','6','7'] [[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1]] 2 +['0','1','2','3','4','5','6','7','8'] [[0],[0],[0],[0],[0],[0],[0],[0],[0]] 2 +['0','1','2','3','4','5','6','7','8','9'] [[],[],[],[],[],[],[],[],[],[]] 2 diff --git a/dbms/tests/queries/0_stateless/00237_group_by_arrays.sql b/dbms/tests/queries/0_stateless/00237_group_by_arrays.sql new file mode 100644 index 00000000000..faf81ebc5e3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00237_group_by_arrays.sql @@ -0,0 +1,6 @@ +SELECT arr1, arr2, count() AS c FROM (SELECT emptyArrayUInt8() AS arr1, [1] AS arr2 UNION ALL SELECT [1], emptyArrayUInt8()) GROUP BY arr1, arr2 ORDER BY c DESC, arr1, arr2; +SELECT arr1, arr2, count() AS c FROM (SELECT range(number) AS arr1, range(toUInt64(10 - number)) AS arr2 FROM system.numbers LIMIT 11) GROUP BY arr1, arr2 ORDER BY c DESC, arr1, arr2; +SELECT arr1, arr2, count() AS c FROM (SELECT arrayMap(x -> toString(x), range(number)) AS arr1, range(toUInt64(10 - number)) AS arr2 FROM system.numbers LIMIT 11) GROUP BY arr1, arr2 ORDER BY c DESC, arr1, arr2; +SELECT arr1, arr2, count() AS c FROM (SELECT arrayMap(x -> toString(x), range(number)) AS arr1, arrayMap(x -> toString(x), range(toUInt64(10 - number))) AS arr2 FROM system.numbers LIMIT 11) GROUP BY arr1, arr2 ORDER BY c DESC, arr1, arr2; +SELECT arr1, arr2, count() AS c FROM (SELECT arrayMap(x -> toString(x), range(number)) AS arr1, replicate(range(toUInt64(10 - number)), arr1) AS arr2 FROM system.numbers LIMIT 11) GROUP BY arr1, arr2 ORDER BY c DESC, arr1, arr2; +SELECT arr1, arr2, count() AS c FROM (SELECT arrayMap(x -> toString(x), range(number)) AS arr1, replicate(range(toUInt64(10 - number)), arr1) AS arr2 FROM (SELECT number % 11 AS number FROM system.numbers LIMIT 30)) GROUP BY arr1, arr2 ORDER BY c DESC, arr1, arr2; From 0ced5b4013645184a395f364dbcbc1a219b1b98d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Oct 2015 09:51:22 +0300 Subject: [PATCH 20/39] dbms: improved performance of aggregation by arrays [#METR-13168]. --- dbms/include/DB/Common/Arena.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/Common/Arena.h b/dbms/include/DB/Common/Arena.h index 0d934be7272..5ee75c57193 100644 --- a/dbms/include/DB/Common/Arena.h +++ b/dbms/include/DB/Common/Arena.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -84,7 +85,7 @@ private: } /// Добавить следующий непрерывный кусок памяти размера не меньше заданного. - void addChunk(size_t min_size) + void NO_INLINE addChunk(size_t min_size) { head = new Chunk(nextSize(min_size), head); size_in_bytes += head->size(); From fefd3791f15eabfc399b93fbf2c46c78d5cadf9e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Oct 2015 03:33:43 +0300 Subject: [PATCH 21/39] Moved files [#METR-17973]. --- .../AggregateFunctionQuantileTiming.h | 2 +- .../AggregateFunctionSequenceMatch.h | 2 +- dbms/include/DB/Common/SimpleCache.h | 2 +- .../MaterializingBlockOutputStream.h | 2 +- .../SummingSortedBlockInputStream.h | 4 +- .../include/DB/Dictionaries/CacheDictionary.h | 2 +- .../Dictionaries/ClickHouseDictionarySource.h | 2 +- dbms/include/DB/Dictionaries/FlatDictionary.h | 2 +- .../DB/Dictionaries/HashedDictionary.h | 2 +- .../DB/Dictionaries/MySQLBlockInputStream.h | 2 +- .../DB/Dictionaries/MySQLDictionarySource.h | 2 +- .../DB/Dictionaries/RangeHashedDictionary.h | 2 +- dbms/include/DB/Functions/FunctionsArray.h | 2 +- dbms/include/DB/Functions/FunctionsCoding.h | 2 +- .../DB/Functions/FunctionsConversion.h | 2 +- .../DB/Functions/FunctionsDictionaries.h | 2 +- dbms/include/DB/Functions/FunctionsHashing.h | 2 +- .../DB/Functions/FunctionsMiscellaneous.h | 2 +- dbms/include/DB/Functions/FunctionsString.h | 2 +- .../DB/Functions/FunctionsStringSearch.h | 2 +- .../DB/Storages/MergeTree/MergeTreeReadPool.h | 2 +- .../MergeTree/MergeTreeWhereOptimizer.h | 4 +- dbms/src/Interpreters/Compiler.cpp | 2 +- dbms/src/Interpreters/ExpressionActions.cpp | 1 - dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- .../src/Interpreters/ExternalDictionaries.cpp | 2 +- dbms/src/Server/Server.cpp | 2 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- .../System/StorageSystemDictionaries.cpp | 2 +- .../libcommon/include/ext/function_traits.hpp | 16 ++++ libs/libcommon/include/ext/map.hpp | 86 +++++++++++++++++++ libs/libcommon/include/ext/range.hpp | 77 +++++++++++++++++ libs/libcommon/include/ext/scope_guard.hpp | 26 ++++++ libs/libcommon/include/ext/unlock_guard.hpp | 27 ++++++ 35 files changed, 263 insertions(+), 32 deletions(-) create mode 100644 libs/libcommon/include/ext/function_traits.hpp create mode 100644 libs/libcommon/include/ext/map.hpp create mode 100644 libs/libcommon/include/ext/range.hpp create mode 100644 libs/libcommon/include/ext/scope_guard.hpp create mode 100644 libs/libcommon/include/ext/unlock_guard.hpp diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h index 86a75c339e0..e61997c5d82 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h @@ -15,7 +15,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h index 87f86ded3f8..9800a1cfe3b 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Common/SimpleCache.h b/dbms/include/DB/Common/SimpleCache.h index 6a2bd544e1d..aadc6159a1f 100644 --- a/dbms/include/DB/Common/SimpleCache.h +++ b/dbms/include/DB/Common/SimpleCache.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include /** Простейший кэш для свободной функции. diff --git a/dbms/include/DB/DataStreams/MaterializingBlockOutputStream.h b/dbms/include/DB/DataStreams/MaterializingBlockOutputStream.h index b3f4a78ad30..02b9c7d8bab 100644 --- a/dbms/include/DB/DataStreams/MaterializingBlockOutputStream.h +++ b/dbms/include/DB/DataStreams/MaterializingBlockOutputStream.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/include/DB/DataStreams/SummingSortedBlockInputStream.h b/dbms/include/DB/DataStreams/SummingSortedBlockInputStream.h index 468555b5aed..25377433496 100644 --- a/dbms/include/DB/DataStreams/SummingSortedBlockInputStream.h +++ b/dbms/include/DB/DataStreams/SummingSortedBlockInputStream.h @@ -6,8 +6,8 @@ #include #include #include -#include -#include +#include +#include namespace DB diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index 995f4945865..c6044ca6256 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h index 5cf7394c9e3..d7ecab46e1d 100644 --- a/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include "writeParenthesisedString.h" diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 19aced48322..0781d3ae406 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index 32d4c0b4777..a94eab42784 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h b/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h index b8f4c4bb244..2cababde17b 100644 --- a/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h +++ b/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h index 249c8488c98..8168be068e2 100644 --- a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h index a102760bb0c..1b52d6a3be3 100644 --- a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h +++ b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/include/DB/Functions/FunctionsArray.h b/dbms/include/DB/Functions/FunctionsArray.h index 5b187e5259d..f3bc6268477 100644 --- a/dbms/include/DB/Functions/FunctionsArray.h +++ b/dbms/include/DB/Functions/FunctionsArray.h @@ -17,7 +17,7 @@ #include #include -#include +#include #include diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index 45f258dde62..4a45cc6444e 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include diff --git a/dbms/include/DB/Functions/FunctionsConversion.h b/dbms/include/DB/Functions/FunctionsConversion.h index 698e8ac24e2..6a6b3551686 100644 --- a/dbms/include/DB/Functions/FunctionsConversion.h +++ b/dbms/include/DB/Functions/FunctionsConversion.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace DB diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index 4cb6b449c34..1c60af47803 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -20,7 +20,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/include/DB/Functions/FunctionsHashing.h b/dbms/include/DB/Functions/FunctionsHashing.h index b356df55eb9..2d9e077fc0c 100644 --- a/dbms/include/DB/Functions/FunctionsHashing.h +++ b/dbms/include/DB/Functions/FunctionsHashing.h @@ -22,7 +22,7 @@ #include #include -#include +#include #include diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 10c0cc262f5..a7298196996 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/include/DB/Functions/FunctionsString.h b/dbms/include/DB/Functions/FunctionsString.h index bc95afd8089..498221cfb8c 100644 --- a/dbms/include/DB/Functions/FunctionsString.h +++ b/dbms/include/DB/Functions/FunctionsString.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/dbms/include/DB/Functions/FunctionsStringSearch.h b/dbms/include/DB/Functions/FunctionsStringSearch.h index 603dae6b4b4..1215b626015 100644 --- a/dbms/include/DB/Functions/FunctionsStringSearch.h +++ b/dbms/include/DB/Functions/FunctionsStringSearch.h @@ -19,7 +19,7 @@ #include #include -#include +#include #include diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h b/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h index eac78bf6b75..3ca8fc1e100 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeWhereOptimizer.h b/dbms/include/DB/Storages/MergeTree/MergeTreeWhereOptimizer.h index 0505bb8ffc9..165ea34b61b 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -10,8 +10,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index 0ba17271c71..42a19112e5e 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index caa612434ce..b37fdc8bb79 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 310f56c2aef..0ded822f3b2 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -44,7 +44,7 @@ #include -#include +#include namespace DB diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 9a97899ba34..79813706f65 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 85752ce6295..5f5716d4fd9 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 3801c8b5ef1..6081b50cf09 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace DB diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 815a7f9d72f..82ef5a33352 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 260aef37344..634e40d0078 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/libs/libcommon/include/ext/function_traits.hpp b/libs/libcommon/include/ext/function_traits.hpp new file mode 100644 index 00000000000..31555a7f69f --- /dev/null +++ b/libs/libcommon/include/ext/function_traits.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + + +template +struct function_traits; + +template +struct function_traits +{ + using result = ReturnType; + using arguments = std::tuple; + using arguments_decay = std::tuple::type...>; +}; diff --git a/libs/libcommon/include/ext/map.hpp b/libs/libcommon/include/ext/map.hpp new file mode 100644 index 00000000000..6a7941614ec --- /dev/null +++ b/libs/libcommon/include/ext/map.hpp @@ -0,0 +1,86 @@ +#pragma once + +#include +#include + +namespace ext +{ + /// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers + template + using unqualified_t = std::remove_cv_t>; + + template + using apply_t = typename std::result_of::type; + + template + struct map_iterator : std::iterator< + typename It::iterator_category, + std::remove_reference_t>, + std::ptrdiff_t, + std::add_pointer_t>>, + apply_t> + { + using base_iterator = std::iterator< + typename It::iterator_category, + std::remove_reference_t>, + std::ptrdiff_t, + std::add_pointer>>, + apply_t>; + + It current; + Mapper mapper; + + map_iterator(const It it, const Mapper mapper) : current{it}, mapper{mapper} {} + + typename base_iterator::reference operator*() { return mapper(*current); } + + map_iterator & operator++() { return ++current, *this; } + map_iterator & operator--() { return --current, *this; } + + bool operator==(const map_iterator & other) { return current == other.current; } + bool operator!=(const map_iterator & other) { return current != other.current; } + + typename base_iterator::difference_type operator-(const map_iterator & other) { return current - other.current; } + }; + + template + auto make_map_iterator(const It it, const Mapper mapper) -> ext::map_iterator + { + return { it, mapper }; + } + + /** \brief Returns collection of the same container-type as the input collection, + * with each element transformed by the application of `mapper`. */ + template