diff --git a/CMakeLists.txt b/CMakeLists.txt index 02f25a3abcf..440f415867c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,7 @@ if(WITH_COVERAGE AND COMPILER_CLANG) endif() if(WITH_COVERAGE AND COMPILER_GCC) set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage") + set(COVERAGE_OPTION "-lgcov") endif() set (CMAKE_BUILD_COLOR_MAKEFILE ON) @@ -255,7 +256,7 @@ if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_L if (USE_LIBCXX) set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lc++ -lc++abi ${EXCEPTION_HANDLING_LIBRARY} ${BUILTINS_LIB_PATH} -Wl,-Bdynamic") else () - set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lstdc++ ${EXCEPTION_HANDLING_LIBRARY} -lgcc ${BUILTINS_LIB_PATH} -Wl,-Bdynamic") + set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lstdc++ ${EXCEPTION_HANDLING_LIBRARY} ${COVERAGE_OPTION} -lgcc ${BUILTINS_LIB_PATH} -Wl,-Bdynamic") endif () # Linking with GLIBC prevents portability of binaries to older systems. @@ -471,4 +472,5 @@ if (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_LIBRARY_FOR_EXCEPTION_HANDLING) add_default_dependencies(brotli) add_default_dependencies(libprotobuf) add_default_dependencies(base64) + add_default_dependencies(readpassphrase) endif () diff --git a/dbms/src/Common/FileChecker.cpp b/dbms/src/Common/FileChecker.cpp index d196c703e36..5256b683653 100644 --- a/dbms/src/Common/FileChecker.cpp +++ b/dbms/src/Common/FileChecker.cpp @@ -42,35 +42,40 @@ void FileChecker::update(const Files::const_iterator & begin, const Files::const save(); } -bool FileChecker::check() const +CheckResults FileChecker::check() const { /** Read the files again every time you call `check` - so as not to violate the constancy. * `check` method is rarely called. */ + + CheckResults results; Map local_map; load(local_map, files_info_path); if (local_map.empty()) - return true; + return {}; for (const auto & name_size : local_map) { - Poco::File file(Poco::Path(files_info_path).parent().toString() + "/" + name_size.first); + Poco::Path path = Poco::Path(files_info_path).parent().toString() + "/" + name_size.first; + Poco::File file(path); if (!file.exists()) { - LOG_ERROR(log, "File " << file.path() << " doesn't exist"); - return false; + results.emplace_back(path.getFileName(), false, "File " + file.path() + " doesn't exist"); + break; } + size_t real_size = file.getSize(); if (real_size != name_size.second) { - LOG_ERROR(log, "Size of " << file.path() << " is wrong. Size is " << real_size << " but should be " << name_size.second); - return false; + results.emplace_back(path.getFileName(), false, "Size of " + file.path() + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second)); + break; } + results.emplace_back(path.getFileName(), true, ""); } - return true; + return results; } void FileChecker::initialize() diff --git a/dbms/src/Common/FileChecker.h b/dbms/src/Common/FileChecker.h index 26167826888..bd69867b11c 100644 --- a/dbms/src/Common/FileChecker.h +++ b/dbms/src/Common/FileChecker.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -24,7 +25,7 @@ public: void update(const Files::const_iterator & begin, const Files::const_iterator & end); /// Check the files whose parameters are specified in sizes.json - bool check() const; + CheckResults check() const; private: void initialize(); diff --git a/dbms/src/Functions/bitTest.cpp b/dbms/src/Functions/bitTest.cpp index 1818f7af479..ebb52a4dacf 100644 --- a/dbms/src/Functions/bitTest.cpp +++ b/dbms/src/Functions/bitTest.cpp @@ -1,5 +1,7 @@ #include #include +#include + namespace DB { @@ -10,7 +12,7 @@ struct BitTestImpl using ResultType = UInt8; template - static inline Result apply(A a, B b) + NO_SANITIZE_UNDEFINED static inline Result apply(A a, B b) { return (typename NumberTraits::ToInteger::Type(a) >> typename NumberTraits::ToInteger::Type(b)) & 1; } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index a1345cd7ba9..f0afe55379f 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -144,7 +144,11 @@ void ExpressionAnalyzer::analyzeAggregation() { getRootActions(array_join_expression_list, true, temp_actions); addMultipleArrayJoinAction(temp_actions, is_array_join_left); - array_join_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); + + array_join_columns.clear(); + for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList()) + if (syntax->array_join_result_to_source.count(column.name)) + array_join_columns.emplace_back(column); } const ASTTablesInSelectQueryElement * join = select_query->join(); diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index c99c74fa33a..3edcc12fedc 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -11,6 +12,21 @@ namespace DB { +namespace +{ + +NamesAndTypes getBlockStructure() +{ + return { + {"part_path", std::make_shared()}, + {"is_passed", std::make_shared()}, + {"message", std::make_shared()}, + }; +} + +} + + InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, const Context & context_) : query_ptr(query_ptr_), context(context_) { @@ -19,18 +35,32 @@ InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, const Co BlockIO InterpreterCheckQuery::execute() { - const auto & alter = query_ptr->as(); - const String & table_name = alter.table; - String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database; + const auto & check = query_ptr->as(); + const String & table_name = check.table; + String database_name = check.database.empty() ? context.getCurrentDatabase() : check.database; StoragePtr table = context.getTable(database_name, table_name); + auto check_results = table->checkData(query_ptr, context); - auto column = ColumnUInt8::create(); - column->insertValue(UInt64(table->checkData())); - result = Block{{ std::move(column), std::make_shared(), "result" }}; + auto block_structure = getBlockStructure(); + auto path_column = block_structure[0].type->createColumn(); + auto is_passed_column = block_structure[1].type->createColumn(); + auto message_column = block_structure[2].type->createColumn(); + + for (const auto & check_result : check_results) + { + path_column->insert(check_result.fs_path); + is_passed_column->insert(static_cast(check_result.success)); + message_column->insert(check_result.failure_message); + } + + Block block({ + {std::move(path_column), block_structure[0].type, block_structure[0].name}, + {std::move(is_passed_column), block_structure[1].type, block_structure[1].name}, + {std::move(message_column), block_structure[2].type, block_structure[2].name}}); BlockIO res; - res.in = std::make_shared(result); + res.in = std::make_shared(block); return res; } diff --git a/dbms/src/Parsers/ASTCheckQuery.h b/dbms/src/Parsers/ASTCheckQuery.h index 595b6c2ecb6..40665f6f2b6 100644 --- a/dbms/src/Parsers/ASTCheckQuery.h +++ b/dbms/src/Parsers/ASTCheckQuery.h @@ -1,12 +1,17 @@ #pragma once #include +#include + namespace DB { struct ASTCheckQuery : public ASTQueryWithTableAndOutput { + + ASTPtr partition; + /** Get the text that identifies this element. */ String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; } @@ -19,7 +24,7 @@ struct ASTCheckQuery : public ASTQueryWithTableAndOutput } protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked frame) const override + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { std::string nl_or_nothing = settings.one_line ? "" : "\n"; @@ -37,6 +42,12 @@ protected: } settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << backQuoteIfNeed(table) << (settings.hilite ? hilite_none : ""); } + + if (partition) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } } }; diff --git a/dbms/src/Parsers/ParserCheckQuery.cpp b/dbms/src/Parsers/ParserCheckQuery.cpp index cd25e60b887..5ba8119571d 100644 --- a/dbms/src/Parsers/ParserCheckQuery.cpp +++ b/dbms/src/Parsers/ParserCheckQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,9 +12,11 @@ namespace DB bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_check_table("CHECK TABLE"); + ParserKeyword s_partition("PARTITION"); ParserToken s_dot(TokenType::Dot); ParserIdentifier table_parser; + ParserPartition partition_parser; ASTPtr table; ASTPtr database; @@ -23,24 +26,28 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!table_parser.parse(pos, database, expected)) return false; + auto query = std::make_shared(); if (s_dot.ignore(pos)) { if (!table_parser.parse(pos, table, expected)) return false; - auto query = std::make_shared(); getIdentifierName(database, query->database); getIdentifierName(table, query->table); - node = query; } else { table = database; - auto query = std::make_shared(); getIdentifierName(table, query->table); - node = query; } + if (s_partition.ignore(pos, expected)) + { + if (!partition_parser.parse(pos, query->partition, expected)) + return false; + } + + node = query; return true; } diff --git a/dbms/src/Storages/CheckResults.h b/dbms/src/Storages/CheckResults.h new file mode 100644 index 00000000000..0f895fba3bc --- /dev/null +++ b/dbms/src/Storages/CheckResults.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Result of CHECK TABLE query for single part of table +struct CheckResult +{ + /// Part name for merge tree or file name for simplier tables + String fs_path; + /// Does check passed + bool success = false; + /// Failure message if any + String failure_message; + + CheckResult() = default; + CheckResult(const String & fs_path_, bool success_, String failure_message_) + : fs_path(fs_path_), success(success_), failure_message(failure_message_) + {} +}; + +using CheckResults = std::vector; + +} diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 5bfd8224372..da32b7aa74a 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -285,7 +286,7 @@ public: virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; } /// Checks validity of the data - virtual bool checkData() const { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } + virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index b06dea30052..045aa8a6461 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -181,7 +181,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par } -void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) +CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) { LOG_WARNING(log, "Checking part " << part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); @@ -197,6 +197,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) if (!part) { searchForMissingPart(part_name); + return {part_name, false, "Part is missing, will search for it"}; } /// We have this part, and it's active. We will check whether we need this part and whether it has the right data. else if (part->name == part_name) @@ -242,7 +243,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) if (need_stop) { LOG_INFO(log, "Checking part was cancelled."); - return; + return {part_name, false, "Checking part was cancelled"}; } LOG_INFO(log, "Part " << part_name << " looks good."); @@ -253,13 +254,15 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) tryLogCurrentException(log, __PRETTY_FUNCTION__); - LOG_ERROR(log, "Part " << part_name << " looks broken. Removing it and queueing a fetch."); + String message = "Part " + part_name + " looks broken. Removing it and queueing a fetch."; + LOG_ERROR(log, message); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); storage.removePartAndEnqueueFetch(part_name); /// Delete part locally. storage.forgetPartAndMoveToDetached(part, "broken"); + return {part_name, false, message}; } } else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) @@ -269,8 +272,10 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) /// Therefore, delete only if the part is old (not very reliable). ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - LOG_ERROR(log, "Unexpected part " << part_name << " in filesystem. Removing."); + String message = "Unexpected part " + part_name + " in filesystem. Removing."; + LOG_ERROR(log, message); storage.forgetPartAndMoveToDetached(part, "unexpected"); + return {part_name, false, message}; } else { @@ -290,6 +295,8 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) /// In the worst case, errors will still appear `old_parts_lifetime` seconds in error log until the part is removed as the old one. LOG_WARNING(log, "We have part " << part->name << " covering part " << part_name); } + + return {part_name, true, ""}; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 432fb0f4bb6..322ee593c46 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -66,12 +67,12 @@ public: /// Get the number of parts in the queue for check. size_t size() const; - + /// Check part by name + CheckResult checkPart(const String & part_name); private: void run(); - void checkPart(const String & part_name); void searchForMissingPart(const String & part_name); StorageReplicatedMergeTree & storage; diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 2b17a675144..af34cb1ea1e 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -53,6 +53,20 @@ public: private: ReadBufferFromFile mrk_file_buf; + + std::pair readMarkFromFile() + { + size_t mrk_rows; + MarkInCompressedFile mrk_mark; + readIntBinary(mrk_mark.offset_in_compressed_file, mrk_hashing_buf); + readIntBinary(mrk_mark.offset_in_decompressed_block, mrk_hashing_buf); + if (mrk_file_extension == ".mrk2") + readIntBinary(mrk_rows, mrk_hashing_buf); + else + mrk_rows = index_granularity.getMarkRows(mark_position); + + return {mrk_mark, mrk_rows}; + } public: HashingReadBuffer mrk_hashing_buf; @@ -78,15 +92,8 @@ public: void assertMark(bool only_read=false) { - MarkInCompressedFile mrk_mark; - readIntBinary(mrk_mark.offset_in_compressed_file, mrk_hashing_buf); - readIntBinary(mrk_mark.offset_in_decompressed_block, mrk_hashing_buf); - size_t mrk_rows; - if (mrk_file_extension == ".mrk2") - readIntBinary(mrk_rows, mrk_hashing_buf); - else - mrk_rows = index_granularity.getMarkRows(mark_position); + auto [mrk_mark, mrk_rows] = readMarkFromFile(); bool has_alternative_mark = false; MarkInCompressedFile alternative_data_mark = {}; MarkInCompressedFile data_mark = {}; @@ -136,6 +143,12 @@ public: + toString(compressed_hashing_buf.count()) + " (compressed), " + toString(uncompressed_hashing_buf.count()) + " (uncompressed)", ErrorCodes::CORRUPTED_DATA); + if (index_granularity.hasFinalMark()) + { + auto [final_mark, final_mark_rows] = readMarkFromFile(); + if (final_mark_rows != 0) + throw Exception("Incorrect final mark at the end of " + mrk_file_path + " expected 0 rows, got " + toString(final_mark_rows), ErrorCodes::CORRUPTED_DATA); + } if (!mrk_hashing_buf.eof()) throw Exception("EOF expected in " + mrk_file_path + " file" + " at position " diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp index afef7af57e9..d297353135d 100644 --- a/dbms/src/Storages/StorageLog.cpp +++ b/dbms/src/Storages/StorageLog.cpp @@ -625,7 +625,7 @@ BlockOutputStreamPtr StorageLog::write( return std::make_shared(*this); } -bool StorageLog::checkData() const +CheckResults StorageLog::checkData(const ASTPtr & /* query */, const Context & /* context */) { std::shared_lock lock(rwlock); return file_checker.check(); diff --git a/dbms/src/Storages/StorageLog.h b/dbms/src/Storages/StorageLog.h index cf0d07a3bfe..7d58f4e4a5d 100644 --- a/dbms/src/Storages/StorageLog.h +++ b/dbms/src/Storages/StorageLog.h @@ -38,7 +38,7 @@ public: void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; - bool checkData() const override; + CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; void truncate(const ASTPtr &, const Context &) override; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index df936c167d3..6b32a99c167 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -7,8 +7,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -17,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1121,4 +1124,59 @@ ActionLock StorageMergeTree::getActionLock(StorageActionBlockType action_type) return {}; } +CheckResults StorageMergeTree::checkData(const ASTPtr & query, const Context & context) +{ + CheckResults results; + DataPartsVector data_parts; + if (const auto & check_query = query->as(); check_query.partition) + { + String partition_id = getPartitionIDFromQuery(check_query.partition, context); + data_parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + } + else + data_parts = getDataPartsVector(); + + for (auto & part : data_parts) + { + String full_part_path = part->getFullPath(); + /// If the checksums file is not present, calculate the checksums and write them to disk. + String checksums_path = full_part_path + "checksums.txt"; + String tmp_checksums_path = full_part_path + "checksums.txt.tmp"; + if (!Poco::File(checksums_path).exists()) + { + try + { + auto calculated_checksums = checkDataPart(part, false, primary_key_data_types, skip_indices); + calculated_checksums.checkEqual(part->checksums, true); + WriteBufferFromFile out(tmp_checksums_path, 4096); + part->checksums.write(out); + Poco::File(tmp_checksums_path).renameTo(checksums_path); + results.emplace_back(part->name, true, "Checksums recounted and written to disk."); + } + catch (const Exception & ex) + { + Poco::File tmp_file(tmp_checksums_path); + if (tmp_file.exists()) + tmp_file.remove(); + + results.emplace_back(part->name, false, + "Check of part finished with error: '" + ex.message() + "'"); + } + } + else + { + try + { + checkDataPart(part, true, primary_key_data_types, skip_indices); + results.emplace_back(part->name, true, ""); + } + catch (const Exception & ex) + { + results.emplace_back(part->name, false, ex.message()); + } + } + } + return results; +} + } diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index b5156ce7137..0de9618d915 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -70,6 +70,8 @@ public: String getDataPath() const override { return full_path; } + CheckResults checkData(const ASTPtr & query, const Context & context) override; + private: String path; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b02f22b987e..b51da168192 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -2380,7 +2381,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n auto results = zookeeper->multi(ops); - String path_created = dynamic_cast(*results[0]).path_created; + String path_created = dynamic_cast(*results.back()).path_created; log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); queue.insert(zookeeper, log_entry); } @@ -5107,4 +5108,31 @@ bool StorageReplicatedMergeTree::dropPartsInPartition( return true; } + +CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, const Context & context) +{ + CheckResults results; + DataPartsVector data_parts; + if (const auto & check_query = query->as(); check_query.partition) + { + String partition_id = getPartitionIDFromQuery(check_query.partition, context); + data_parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + } + else + data_parts = getDataPartsVector(); + + for (auto & part : data_parts) + { + try + { + results.push_back(part_check_thread.checkPart(part->name)); + } + catch (const Exception & ex) + { + results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'"); + } + } + return results; +} + } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index eba0511e15e..59afb96a523 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -170,6 +170,8 @@ public: String getDataPath() const override { return full_path; } + CheckResults checkData(const ASTPtr & query, const Context & context) override; + private: /// Delete old parts from disk and from ZooKeeper. void clearOldPartsAndRemoveFromZK(); diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index dba2e64a88f..28f8616bbaa 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -282,7 +282,7 @@ BlockOutputStreamPtr StorageStripeLog::write( } -bool StorageStripeLog::checkData() const +CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, const Context & /* context */) { std::shared_lock lock(rwlock); return file_checker.check(); diff --git a/dbms/src/Storages/StorageStripeLog.h b/dbms/src/Storages/StorageStripeLog.h index 6489c82873e..31b681e5800 100644 --- a/dbms/src/Storages/StorageStripeLog.h +++ b/dbms/src/Storages/StorageStripeLog.h @@ -40,7 +40,7 @@ public: void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; - bool checkData() const override; + CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; /// Data of the file. struct ColumnData diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 4690ab925e8..eacca1c6413 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -32,6 +32,7 @@ #include #include +#include #include @@ -404,7 +405,7 @@ BlockOutputStreamPtr StorageTinyLog::write( } -bool StorageTinyLog::checkData() const +CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, const Context & /* context */) { return file_checker.check(); } diff --git a/dbms/src/Storages/StorageTinyLog.h b/dbms/src/Storages/StorageTinyLog.h index 5b8e4bc90ac..111df9baf90 100644 --- a/dbms/src/Storages/StorageTinyLog.h +++ b/dbms/src/Storages/StorageTinyLog.h @@ -39,7 +39,7 @@ public: void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; - bool checkData() const override; + CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; /// Column data struct ColumnData diff --git a/dbms/tests/integration/test_check_table/__init__.py b/dbms/tests/integration/test_check_table/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_check_table/test.py b/dbms/tests/integration/test_check_table/test.py new file mode 100644 index 00000000000..6d53b423896 --- /dev/null +++ b/dbms/tests/integration/test_check_table/test.py @@ -0,0 +1,129 @@ +import time + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in [node1, node2]: + node.query(''' + CREATE TABLE replicated_mt(date Date, id UInt32, value Int32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id; + '''.format(replica=node.name)) + + node1.query(''' + CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32) + ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id; + ''') + + yield cluster + + finally: + cluster.shutdown() + + +def corrupt_data_part_on_disk(node, table, part_name): + part_path = node.query("SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() + node.exec_in_container(['bash', '-c', 'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(p=part_path)], privileged=True) + +def remove_checksums_on_disk(node, table, part_name): + part_path = node.query("SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() + node.exec_in_container(['bash', '-c', 'rm -r {p}/checksums.txt'.format(p=part_path)], privileged=True) + +def remove_part_from_disk(node, table, part_name): + part_path = node.query("SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() + if not part_path: + raise Exception("Part " + part_name + "doesn't exist") + node.exec_in_container(['bash', '-c', 'rm -r {p}/*'.format(p=part_path)], privileged=True) + + +def test_check_normal_table_corruption(started_cluster): + node1.query("INSERT INTO non_replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)") + assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201902") == "201902_1_1_0\t1\t\n" + + remove_checksums_on_disk(node1, "non_replicated_mt", "201902_1_1_0") + + assert node1.query("CHECK TABLE non_replicated_mt").strip() == "201902_1_1_0\t1\tChecksums recounted and written to disk." + + assert node1.query("SELECT COUNT() FROM non_replicated_mt") == "2\n" + + remove_checksums_on_disk(node1, "non_replicated_mt", "201902_1_1_0") + + assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201902").strip() == "201902_1_1_0\t1\tChecksums recounted and written to disk." + + assert node1.query("SELECT COUNT() FROM non_replicated_mt") == "2\n" + + corrupt_data_part_on_disk(node1, "non_replicated_mt", "201902_1_1_0") + + assert node1.query("CHECK TABLE non_replicated_mt").strip() == "201902_1_1_0\t0\tCannot read all data. Bytes read: 2. Bytes expected: 16." + + assert node1.query("CHECK TABLE non_replicated_mt").strip() == "201902_1_1_0\t0\tCannot read all data. Bytes read: 2. Bytes expected: 16." + + node1.query("INSERT INTO non_replicated_mt VALUES (toDate('2019-01-01'), 1, 10), (toDate('2019-01-01'), 2, 12)") + + assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201901") == "201901_2_2_0\t1\t\n" + + corrupt_data_part_on_disk(node1, "non_replicated_mt", "201901_2_2_0") + + remove_checksums_on_disk(node1, "non_replicated_mt", "201901_2_2_0") + + assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201901") == "201901_2_2_0\t0\tCheck of part finished with error: \\'Cannot read all data. Bytes read: 2. Bytes expected: 16.\\'\n" + + +def test_check_replicated_table_simple(started_cluster): + node1.query("TRUNCATE TABLE replicated_mt") + node2.query("SYSTEM SYNC REPLICA replicated_mt") + node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)") + node2.query("SYSTEM SYNC REPLICA replicated_mt") + + assert node1.query("SELECT count() from replicated_mt") == "2\n" + assert node2.query("SELECT count() from replicated_mt") == "2\n" + + assert node1.query("CHECK TABLE replicated_mt") == "201902_0_0_0\t1\t\n" + assert node2.query("CHECK TABLE replicated_mt") == "201902_0_0_0\t1\t\n" + + node2.query("INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)") + node1.query("SYSTEM SYNC REPLICA replicated_mt") + assert node1.query("SELECT count() from replicated_mt") == "4\n" + assert node2.query("SELECT count() from replicated_mt") == "4\n" + + assert node1.query("CHECK TABLE replicated_mt PARTITION 201901") == "201901_0_0_0\t1\t\n" + assert node2.query("CHECK TABLE replicated_mt PARTITION 201901") == "201901_0_0_0\t1\t\n" + + +def test_check_replicated_table_corruption(started_cluster): + node1.query("TRUNCATE TABLE replicated_mt") + node2.query("SYSTEM SYNC REPLICA replicated_mt") + node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)") + node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)") + node2.query("SYSTEM SYNC REPLICA replicated_mt") + + assert node1.query("SELECT count() from replicated_mt") == "4\n" + assert node2.query("SELECT count() from replicated_mt") == "4\n" + + part_name = node1.query("SELECT name from system.parts where table = 'replicated_mt' and partition_id = '201901' and active = 1").strip() + + corrupt_data_part_on_disk(node1, "replicated_mt", part_name) + assert node1.query("CHECK TABLE replicated_mt PARTITION 201901") == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(p=part_name) + + node1.query("SYSTEM SYNC REPLICA replicated_mt") + assert node1.query("CHECK TABLE replicated_mt PARTITION 201901") == "{}\t1\t\n".format(part_name) + assert node1.query("SELECT count() from replicated_mt") == "4\n" + + remove_part_from_disk(node2, "replicated_mt", part_name) + assert node2.query("CHECK TABLE replicated_mt PARTITION 201901") == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(p=part_name) + + node1.query("SYSTEM SYNC REPLICA replicated_mt") + assert node1.query("CHECK TABLE replicated_mt PARTITION 201901") == "{}\t1\t\n".format(part_name) + assert node1.query("SELECT count() from replicated_mt") == "4\n" diff --git a/dbms/tests/queries/0_stateless/00063_check_query.reference b/dbms/tests/queries/0_stateless/00063_check_query.reference index 6ed281c757a..9b20cc02e31 100644 --- a/dbms/tests/queries/0_stateless/00063_check_query.reference +++ b/dbms/tests/queries/0_stateless/00063_check_query.reference @@ -1,2 +1,5 @@ -1 -1 +N.bin 1 +S.bin 1 +N.bin 1 +S.bin 1 +__marks.mrk 1 diff --git a/dbms/tests/queries/0_stateless/00876_wrong_arraj_join_column.reference b/dbms/tests/queries/0_stateless/00876_wrong_arraj_join_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00876_wrong_arraj_join_column.sql b/dbms/tests/queries/0_stateless/00876_wrong_arraj_join_column.sql new file mode 100644 index 00000000000..0e72f9a67ce --- /dev/null +++ b/dbms/tests/queries/0_stateless/00876_wrong_arraj_join_column.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS visits; +CREATE TABLE visits (str String) ENGINE = MergeTree ORDER BY (str); + +SELECT 1 +FROM visits +ARRAY JOIN arrayFilter(t -> 1, arrayMap(x -> tuple(x), [42])) AS i +WHERE ((str, i.1) IN ('x', 0)); + +DROP TABLE visits; diff --git a/dbms/tests/queries/0_stateless/00961_check_table.reference b/dbms/tests/queries/0_stateless/00961_check_table.reference new file mode 100644 index 00000000000..d85c66db622 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00961_check_table.reference @@ -0,0 +1,11 @@ +201901_1_1_0 1 +======== +201901_1_1_0 1 +201901_2_2_0 1 +======== +201901_1_2_1 1 +======== +201901_1_2_1 1 +201902_3_3_0 1 +======== +201902_3_4_1 1 diff --git a/dbms/tests/queries/0_stateless/00961_check_table.sql b/dbms/tests/queries/0_stateless/00961_check_table.sql new file mode 100644 index 00000000000..9752ffc3974 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00961_check_table.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS mt_table; + +CREATE TABLE mt_table (d Date, key UInt64, data String) ENGINE = MergeTree() PARTITION BY toYYYYMM(d) ORDER BY key; + +CHECK TABLE mt_table; + +INSERT INTO mt_table VALUES (toDate('2019-01-02'), 1, 'Hello'), (toDate('2019-01-02'), 2, 'World'); + +CHECK TABLE mt_table; + +INSERT INTO mt_table VALUES (toDate('2019-01-02'), 3, 'quick'), (toDate('2019-01-02'), 4, 'brown'); + +SELECT '========'; + +CHECK TABLE mt_table; + +OPTIMIZE TABLE mt_table FINAL; + +SELECT '========'; + +CHECK TABLE mt_table; + +SELECT '========'; + +INSERT INTO mt_table VALUES (toDate('2019-02-03'), 5, '!'), (toDate('2019-02-03'), 6, '?'); + +CHECK TABLE mt_table; + +SELECT '========'; + +INSERT INTO mt_table VALUES (toDate('2019-02-03'), 7, 'jump'), (toDate('2019-02-03'), 8, 'around'); + +OPTIMIZE TABLE mt_table FINAL; + +CHECK TABLE mt_table PARTITION 201902; + +DROP TABLE IF EXISTS mt_table; diff --git a/dbms/tests/queries/0_stateless/00967_ubsan_bit_test.reference b/dbms/tests/queries/0_stateless/00967_ubsan_bit_test.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00967_ubsan_bit_test.reference @@ -0,0 +1 @@ +0 diff --git a/dbms/tests/queries/0_stateless/00967_ubsan_bit_test.sql b/dbms/tests/queries/0_stateless/00967_ubsan_bit_test.sql new file mode 100644 index 00000000000..1682e725670 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00967_ubsan_bit_test.sql @@ -0,0 +1 @@ +SELECT sum(ignore(bitTest(number, 65))) FROM numbers(10); diff --git a/dbms/tests/queries/1_stateful/00077_log_tinylog_stripelog.reference b/dbms/tests/queries/1_stateful/00077_log_tinylog_stripelog.reference index 98f50687de4..d8ccbd66592 100644 --- a/dbms/tests/queries/1_stateful/00077_log_tinylog_stripelog.reference +++ b/dbms/tests/queries/1_stateful/00077_log_tinylog_stripelog.reference @@ -1,10 +1,17 @@ -1 -1 -1 8873898 12457120258355519194 8873898 12457120258355519194 8873898 12457120258355519194 8873898 12457120258355519194 -1 -1 -1 +AdvEngineID.bin 1 +CounterID.bin 1 +RegionID.bin 1 +SearchPhrase.bin 1 +UserID.bin 1 +__marks.mrk 1 +AdvEngineID.bin 1 +CounterID.bin 1 +RegionID.bin 1 +SearchPhrase.bin 1 +UserID.bin 1 +data.bin 1 +index.mrk 1 diff --git a/docker/images.json b/docker/images.json index 6ab12504a7d..e2282fcb653 100644 --- a/docker/images.json +++ b/docker/images.json @@ -7,7 +7,9 @@ "docker/test/performance": "yandex/clickhouse-performance-test", "docker/test/pvs": "yandex/clickhouse-pvs-test", "docker/test/stateful": "yandex/clickhouse-stateful-test", + "docker/test/stateful_with_coverage": "yandex/clickhouse-stateful-with-coverage-test", "docker/test/stateless": "yandex/clickhouse-stateless-test", + "docker/test/stateless_with_coverage": "yandex/clickhouse-stateless-with-coverage-test", "docker/test/unit": "yandex/clickhouse-unit-test", "docker/test/stress": "yandex/clickhouse-stress-test", "dbms/tests/integration/image": "yandex/clickhouse-integration-tests-runner" diff --git a/docker/packager/packager b/docker/packager/packager index 9a1d033223b..0e8bf6ea98d 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -103,7 +103,7 @@ def run_vagrant_box_with_env(image_path, output_dir, ch_root): logging.info("Copying binary back") vagrant.copy_from_image("~/ClickHouse/dbms/programs/clickhouse", output_dir) -def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, distcc_hosts, unbundled, split_binary, version, author, official, alien_pkgs): +def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, distcc_hosts, unbundled, split_binary, version, author, official, alien_pkgs, with_coverage): result = [] cmake_flags = ['$CMAKE_FLAGS'] @@ -148,6 +148,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, di if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 -DGLIBC_COMPATIBILITY=ON') + if with_coverage: + cmake_flags.append('-DWITH_COVERAGE=1') + if version: result.append("VERSION_STRING='{}'".format(version)) @@ -180,6 +183,7 @@ if __name__ == "__main__": parser.add_argument("--author", default="clickhouse") parser.add_argument("--official", action="store_true") parser.add_argument("--alien-pkgs", nargs='+', default=[]) + parser.add_argument("--with-coverage", action="store_true") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -202,7 +206,7 @@ if __name__ == "__main__": env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, - args.version, args.author, args.official, args.alien_pkgs) + args.version, args.author, args.official, args.alien_pkgs, args.with_coverage) if args.package_type != "freebsd": run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) else: diff --git a/docker/test/coverage/Dockerfile b/docker/test/coverage/Dockerfile new file mode 100644 index 00000000000..c0c31a42571 --- /dev/null +++ b/docker/test/coverage/Dockerfile @@ -0,0 +1,36 @@ +# docker build -t yandex/clickhouse-coverage . +FROM yandex/clickhouse-deb-builder + +RUN apt-get --allow-unauthenticated update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get --allow-unauthenticated install --yes --no-install-recommends \ + bash \ + fakeroot \ + cmake \ + ccache \ + curl \ + software-properties-common + + +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/sources.list + +RUN apt-get --allow-unauthenticated update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get --allow-unauthenticated install --yes --no-install-recommends \ + perl \ + lcov \ + llvm-9 \ + tzdata + + +ENV COVERAGE_DIR=/coverage_reports +ENV SOURCE_DIR=/build +ENV OUTPUT_DIR=/output +ENV IGNORE='.*contrib.*' + + +CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-7 CXX=clang++-7 cmake .. && cd /; \ + dpkg -i /package_folder/clickhouse-common-static_*.deb; \ + llvm-profdata-9 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ + llvm-cov-9 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ + genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR} diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 65943bf6955..d7707f1c3e0 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -16,6 +16,14 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ dpkg -i package_folder/clickhouse-client_*.deb; \ dpkg -i package_folder/clickhouse-test_*.deb; \ + ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ + ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ + ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \ ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \ echo "TSAN_OPTIONS='halt_on_error=1 history_size=7'" >> /etc/environment; \ echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-8/bin/llvm-symbolizer" >> /etc/environment; \ diff --git a/docker/test/stateful_with_coverage/Dockerfile b/docker/test/stateful_with_coverage/Dockerfile new file mode 100644 index 00000000000..2a566bdcf01 --- /dev/null +++ b/docker/test/stateful_with_coverage/Dockerfile @@ -0,0 +1,19 @@ +# docker build -t yandex/clickhouse-stateful-test . +FROM yandex/clickhouse-stateless-test + +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/sources.list + +RUN apt-get update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + python-requests \ + llvm-8 \ + llvm-9 + +COPY s3downloader /s3downloader +COPY run.sh /run.sh + +ENV DATASETS="hits visits" + +CMD ["/bin/bash", "/run.sh"] + diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh new file mode 100755 index 00000000000..6ec0bfa155a --- /dev/null +++ b/docker/test/stateful_with_coverage/run.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +kill_clickhouse () { + while kill -0 `pgrep -u clickhouse`; + do + kill `pgrep -u clickhouse` 2>/dev/null + echo "Process" `pgrep -u clickhouse` "still alive" + sleep 10 + done +} + +start_clickhouse () { + LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & +} + +wait_llvm_profdata () { + while kill -0 `pgrep llvm-profdata-9`; + do + echo "Waiting for profdata " `pgrep llvm-profdata-9` "still alive" + sleep 3 + done +} + +merge_client_files_in_background () { + client_files=`ls /client_*profraw 2>/dev/null` + if [ ! -z "$client_files" ] + then + llvm-profdata-9 merge -sparse $client_files -o merged_client_`date +%s`.profraw + rm $client_files + fi +} + +chmod 777 / + +dpkg -i package_folder/clickhouse-common-static_*.deb; \ + dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \ + dpkg -i package_folder/clickhouse-server_*.deb; \ + dpkg -i package_folder/clickhouse-client_*.deb; \ + dpkg -i package_folder/clickhouse-test_*.deb + +mkdir -p /var/lib/clickhouse +mkdir -p /var/log/clickhouse-server +chmod 777 -R /var/log/clickhouse-server/ + +ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ + ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ + ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer + + +service zookeeper start + +sleep 5 + +start_clickhouse + +sleep 5 + +/s3downloader --dataset-names $DATASETS + +chmod 777 -R /var/lib/clickhouse + +while /bin/true; do + merge_client_files_in_background + sleep 2 +done & + +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE datasets" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test" + +kill_clickhouse +start_clickhouse + +sleep 10 + +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --shard --zookeeper --no-stateless $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + +kill_clickhouse + +wait_llvm_profdata + +sleep 3 + +wait_llvm_profdata # 100% merged all parts + + +cp /*.profraw /profraw ||: diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader new file mode 100755 index 00000000000..f8e2bf3cbe4 --- /dev/null +++ b/docker/test/stateful_with_coverage/s3downloader @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import sys +import tarfile +import logging +import argparse +import requests +import tempfile + + +DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net' + +AVAILABLE_DATASETS = { + 'hits': 'hits_v1.tar', + 'visits': 'visits_v1.tar', +} + +def _get_temp_file_name(): + return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) + +def build_url(base_url, dataset): + return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset]) + +def dowload_with_progress(url, path): + logging.info("Downloading from %s to temp path %s", url, path) + with open(path, 'w') as f: + response = requests.get(url, stream=True) + response.raise_for_status() + total_length = response.headers.get('content-length') + if total_length is None or int(total_length) == 0: + logging.info("No content-length, will download file without progress") + f.write(response.content) + else: + dl = 0 + total_length = int(total_length) + logging.info("Content length is %ld bytes", total_length) + for data in response.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if sys.stdout.isatty(): + done = int(50 * dl / total_length) + percent = int(100 * float(dl) / total_length) + sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) + sys.stdout.flush() + sys.stdout.write("\n") + logging.info("Downloading finished") + +def unpack_to_clickhouse_directory(tar_path, clickhouse_path): + logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path) + with tarfile.open(tar_path, 'r') as comp_file: + comp_file.extractall(path=clickhouse_path) + logging.info("Unpack finished") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + parser = argparse.ArgumentParser( + description="Simple tool for dowloading datasets for clickhouse from S3") + + parser.add_argument('--dataset-names', required=True, nargs='+', choices=AVAILABLE_DATASETS.keys()) + parser.add_argument('--url-prefix', default=DEFAULT_URL) + parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/') + + args = parser.parse_args() + datasets = args.dataset_names + logging.info("Will fetch following datasets: %s", ', '.join(datasets)) + for dataset in datasets: + logging.info("Processing %s", dataset) + temp_archive_path = _get_temp_file_name() + try: + download_url_for_dataset = build_url(args.url_prefix, dataset) + dowload_with_progress(download_url_for_dataset, temp_archive_path) + unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path) + except Exception as ex: + logging.info("Some exception occured %s", str(ex)) + raise + finally: + logging.info("Will remove dowloaded file %s from filesystem if it exists", temp_archive_path) + if os.path.exists(temp_archive_path): + os.remove(temp_archive_path) + logging.info("Processing of %s finished", dataset) + logging.info("Fetch finished, enjoy your tables!") + + diff --git a/docker/test/stateless_with_coverage/Dockerfile b/docker/test/stateless_with_coverage/Dockerfile new file mode 100644 index 00000000000..b9da18223ab --- /dev/null +++ b/docker/test/stateless_with_coverage/Dockerfile @@ -0,0 +1,36 @@ +# docker build -t yandex/clickhouse-stateless-with-coverage-test . +FROM yandex/clickhouse-deb-builder + +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/sources.list + +RUN apt-get update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + bash \ + tzdata \ + fakeroot \ + debhelper \ + zookeeper \ + zookeeperd \ + expect \ + python \ + python-lxml \ + python-termcolor \ + python-requests \ + curl \ + sudo \ + openssl \ + netcat-openbsd \ + telnet \ + moreutils \ + brotli \ + gdb \ + lsof \ + llvm-9 + + +ENV TZ=Europe/Moscow +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +COPY run.sh /run.sh + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh new file mode 100755 index 00000000000..50082337757 --- /dev/null +++ b/docker/test/stateless_with_coverage/run.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +kill_clickhouse () { + while kill -0 `pgrep -u clickhouse`; + do + kill `pgrep -u clickhouse` 2>/dev/null + echo "Process" `pgrep -u clickhouse` "still alive" + sleep 10 + done +} + +wait_llvm_profdata () { + while kill -0 `pgrep llvm-profdata-9`; + do + echo "Waiting for profdata " `pgrep llvm-profdata-9` "still alive" + sleep 3 + done +} + +start_clickhouse () { + LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & +} + +merge_client_files_in_background () { + client_files=`ls /client_*profraw 2>/dev/null` + if [ ! -z "$client_files" ] + then + llvm-profdata-9 merge -sparse $client_files -o merged_client_`date +%s`.profraw + rm $client_files + fi +} + +chmod 777 / + +dpkg -i package_folder/clickhouse-common-static_*.deb; \ + dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \ + dpkg -i package_folder/clickhouse-server_*.deb; \ + dpkg -i package_folder/clickhouse-client_*.deb; \ + dpkg -i package_folder/clickhouse-test_*.deb + + +mkdir -p /var/lib/clickhouse +mkdir -p /var/log/clickhouse-server +chmod 777 -R /var/lib/clickhouse +chmod 777 -R /var/log/clickhouse-server/ + +ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ + ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ + ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \ + ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer + +service zookeeper start +sleep 5 + +start_clickhouse + +sleep 10 + +while /bin/true; do + merge_client_files_in_background + sleep 2 +done & + +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --shard --zookeeper $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + +kill_clickhouse + +wait_llvm_profdata + +sleep 3 + +wait_llvm_profdata # 100% merged all parts + +cp /*.profraw /profraw ||: diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 0ee49d64d51..b0b94ccc579 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -34,13 +34,8 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \ - echo "TSAN_OPTIONS='halt_on_error=1 history_size=7'" >> /etc/environment; \ - echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-8/bin/llvm-symbolizer" >> /etc/environment; \ + echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \ echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \ - echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \ - echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \ - echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \ - echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \ service clickhouse-server start && sleep 5 \ && /s3downloader --dataset-names $DATASETS \ && chmod 777 -R /var/lib/clickhouse \