From 58a78fba12da9e298611a2ecdd1558eff31bfe53 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Mar 2022 12:58:22 +0800 Subject: [PATCH 01/12] formatString bug fix and refactoring --- src/Common/format.h | 176 +++++++++++++++++ src/Functions/concat.cpp | 24 ++- src/Functions/formatString.cpp | 24 ++- src/Functions/formatString.h | 180 +----------------- ...245_format_string_stack_overflow.reference | 1 + .../02245_format_string_stack_overflow.sql | 1 + 6 files changed, 209 insertions(+), 197 deletions(-) create mode 100644 src/Common/format.h create mode 100644 tests/queries/0_stateless/02245_format_string_stack_overflow.reference create mode 100644 tests/queries/0_stateless/02245_format_string_stack_overflow.sql diff --git a/src/Common/format.h b/src/Common/format.h new file mode 100644 index 00000000000..234ff90061c --- /dev/null +++ b/src/Common/format.h @@ -0,0 +1,176 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace Format +{ + using IndexPositions = PODArrayWithStackMemory; + + static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number) + { + res = 0; + for (UInt64 pos = l; pos < r; ++pos) + { + if (!isNumericASCII(description[pos])) + throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS); + res = res * 10 + description[pos] - '0'; + if (res >= argument_number) + throw Exception( + "Too big number for arguments, must be at most " + std::to_string(argument_number - 1), ErrorCodes::BAD_ARGUMENTS); + } + } + + static inline void init( + const String & pattern, + size_t argument_number, + const std::vector> & constant_strings, + IndexPositions & index_positions, + std::vector & substrings) + { + /// Is current position after open curly brace. + bool is_open_curly = false; + /// The position of last open token. + size_t last_open = -1; + + /// Is formatting in a plain {} token. + std::optional is_plain_numbering; + UInt64 index_if_plain = 0; + + /// Left position of adding substrings, just to the closed brace position or the start of the string. + /// Invariant --- the start of substring is in this position. + size_t start_pos = 0; + + /// A flag to decide whether we should glue the constant strings. + bool glue_to_next = false; + + /// Handling double braces (escaping). + auto double_brace_removal = [](String & str) + { + size_t i = 0; + bool should_delete = true; + str.erase( + std::remove_if( + str.begin(), + str.end(), + [&i, &should_delete, &str](char) + { + bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}'); + ++i; + if (is_double_brace && should_delete) + { + should_delete = false; + return true; + } + should_delete = true; + return false; + }), + str.end()); + }; + + index_positions.emplace_back(); + + for (size_t i = 0; i < pattern.size(); ++i) + { + if (pattern[i] == '{') + { + /// Escaping handling + /// It is safe to access because of null termination + if (pattern[i + 1] == '{') + { + ++i; + continue; + } + + if (is_open_curly) + throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS); + + String to_add = String(pattern.data() + start_pos, i - start_pos); + double_brace_removal(to_add); + if (!glue_to_next) + substrings.emplace_back(to_add); + else + substrings.back() += to_add; + + glue_to_next = false; + + is_open_curly = true; + last_open = i + 1; + } + else if (pattern[i] == '}') + { + if (pattern[i + 1] == '}') + { + ++i; + continue; + } + + if (!is_open_curly) + throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS); + + is_open_curly = false; + + if (last_open == i) + { + if (is_plain_numbering && !*is_plain_numbering) + throw Exception( + "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS); + is_plain_numbering = true; + if (index_if_plain >= argument_number) + throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS); + index_positions.back() = index_if_plain++; + } + else + { + if (is_plain_numbering && *is_plain_numbering) + throw Exception( + "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS); + is_plain_numbering = false; + + UInt64 arg; + parseNumber(pattern, last_open, i, arg, argument_number); + + if (arg >= argument_number) + throw Exception( + "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS); + + index_positions.back() = arg; + } + + if (!constant_strings.empty() && constant_strings[index_positions.back()]) + { + /// The next string should be glued to last `A {} C`.format('B') -> `A B C`. + glue_to_next = true; + substrings.back() += *constant_strings[index_positions.back()]; + } + else + index_positions.emplace_back(); /// Otherwise we commit arg number and proceed. + + start_pos = i + 1; + } + } + + if (is_open_curly) + throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS); + + String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos); + double_brace_removal(to_add); + + if (!glue_to_next) + substrings.emplace_back(to_add); + else + substrings.back() += to_add; + + index_positions.pop_back(); + } +} + +} diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index cd83223de3e..e11071265ce 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -52,23 +52,21 @@ public: { if (arguments.size() < 2) throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at least 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (arguments.size() > FormatImpl::argument_threshold) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at most " + std::to_string(FormatImpl::argument_threshold), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2", + getName(), + arguments.size()); for (const auto arg_idx : collections::range(0, arguments.size())) { const auto * arg = arguments[arg_idx].get(); if (!isStringOrFixedString(arg)) - throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " - + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}", + arg->getName(), + arg_idx + 1, + getName()); } return std::make_shared(); @@ -125,7 +123,7 @@ private: std::vector data(num_arguments); std::vector offsets(num_arguments); std::vector fixed_string_sizes(num_arguments); - std::vector constant_strings(num_arguments); + std::vector> constant_strings(num_arguments); bool has_column_string = false; bool has_column_fixed_string = false; for (size_t i = 0; i < num_arguments; ++i) diff --git a/src/Functions/formatString.cpp b/src/Functions/formatString.cpp index e138c072639..ee90a082e5b 100644 --- a/src/Functions/formatString.cpp +++ b/src/Functions/formatString.cpp @@ -45,25 +45,23 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.empty()) + if (arguments.size() < 2) throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at least 1", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (arguments.size() > FormatImpl::argument_threshold) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at most " + std::to_string(FormatImpl::argument_threshold), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2", + getName(), + arguments.size()); for (const auto arg_idx : collections::range(0, arguments.size())) { const auto * arg = arguments[arg_idx].get(); if (!isStringOrFixedString(arg)) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}", + arg->getName(), + arg_idx + 1, + getName()); } return std::make_shared(); @@ -84,7 +82,7 @@ public: std::vector data(arguments.size() - 1); std::vector offsets(arguments.size() - 1); std::vector fixed_string_sizes(arguments.size() - 1); - std::vector constant_strings(arguments.size() - 1); + std::vector> constant_strings(arguments.size() - 1); bool has_column_string = false; bool has_column_fixed_string = false; diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h index 419ecf1c773..3c5057ecb97 100644 --- a/src/Functions/formatString.h +++ b/src/Functions/formatString.h @@ -4,8 +4,10 @@ #include #include #include +#include #include + #include #include #include @@ -22,8 +24,6 @@ namespace ErrorCodes struct FormatImpl { - static constexpr size_t small_argument_threshold = 1024; - static constexpr size_t argument_threshold = std::numeric_limits::max(); static constexpr size_t right_padding = 15; template @@ -39,165 +39,10 @@ struct FormatImpl format(std::forward(args)...); } - static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res) - { - res = 0; - for (UInt64 pos = l; pos < r; ++pos) - { - if (!isNumericASCII(description[pos])) - throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS); - res = res * 10 + description[pos] - '0'; - if (res >= argument_threshold) - throw Exception( - "Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS); - } - } - - static inline void init( - const String & pattern, - const std::vector & data, - size_t argument_number, - const std::vector & constant_strings, - UInt64 * index_positions_ptr, - std::vector & substrings) - { - /// Is current position after open curly brace. - bool is_open_curly = false; - /// The position of last open token. - size_t last_open = -1; - - /// Is formatting in a plain {} token. - std::optional is_plain_numbering; - UInt64 index_if_plain = 0; - - /// Left position of adding substrings, just to the closed brace position or the start of the string. - /// Invariant --- the start of substring is in this position. - size_t start_pos = 0; - - /// A flag to decide whether we should glue the constant strings. - bool glue_to_next = false; - - /// Handling double braces (escaping). - auto double_brace_removal = [](String & str) - { - size_t i = 0; - bool should_delete = true; - str.erase( - std::remove_if( - str.begin(), - str.end(), - [&i, &should_delete, &str](char) - { - bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}'); - ++i; - if (is_double_brace && should_delete) - { - should_delete = false; - return true; - } - should_delete = true; - return false; - }), - str.end()); - }; - - for (size_t i = 0; i < pattern.size(); ++i) - { - if (pattern[i] == '{') - { - /// Escaping handling - /// It is safe to access because of null termination - if (pattern[i + 1] == '{') - { - ++i; - continue; - } - - if (is_open_curly) - throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS); - - String to_add = String(pattern.data() + start_pos, i - start_pos); - double_brace_removal(to_add); - if (!glue_to_next) - substrings.emplace_back(to_add); - else - substrings.back() += to_add; - - glue_to_next = false; - - is_open_curly = true; - last_open = i + 1; - } - else if (pattern[i] == '}') - { - if (pattern[i + 1] == '}') - { - ++i; - continue; - } - - if (!is_open_curly) - throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS); - - is_open_curly = false; - - if (last_open == i) - { - if (is_plain_numbering && !*is_plain_numbering) - throw Exception( - "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS); - is_plain_numbering = true; - if (index_if_plain >= argument_number) - throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS); - *index_positions_ptr = index_if_plain++; - } - else - { - if (is_plain_numbering && *is_plain_numbering) - throw Exception( - "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS); - is_plain_numbering = false; - - UInt64 arg; - parseNumber(pattern, last_open, i, arg); - - if (arg >= argument_number) - throw Exception( - "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS); - - *index_positions_ptr = arg; - } - - /// Constant string. - if (!data[*index_positions_ptr]) - { - /// The next string should be glued to last `A {} C`.format('B') -> `A B C`. - glue_to_next = true; - substrings.back() += constant_strings[*index_positions_ptr]; - } - else - ++index_positions_ptr; /// Otherwise we commit arg number and proceed. - - start_pos = i + 1; - } - } - - if (is_open_curly) - throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS); - - String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos); - double_brace_removal(to_add); - - if (!glue_to_next) - substrings.emplace_back(to_add); - else - substrings.back() += to_add; - } - /// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string. /// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString. /// fixed_string_N for savings N to fixed strings. - /// constant_strings for constant strings. If data[i] is nullptr, than it is constant string. + /// constant_strings for constant strings. If data[i] is nullptr, it is constant string. /// res_data is result_data, res_offsets is offset result. /// input_rows_count is the number of rows processed. /// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size(). @@ -207,29 +52,22 @@ struct FormatImpl const std::vector & data, const std::vector & offsets, [[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector & fixed_string_N, - const std::vector & constant_strings, + const std::vector> & constant_strings, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, size_t input_rows_count) { const size_t argument_number = offsets.size(); - UInt64 small_index_positions_buffer[small_argument_threshold]; - /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this array will be filled with [1, 3, 1, 0, ... (garbage)] but without constant string indices. - UInt64 * index_positions = small_index_positions_buffer; - - std::unique_ptr big_index_positions_buffer; - if (argument_number > small_argument_threshold) - { - big_index_positions_buffer.reset(new UInt64[argument_number]); - index_positions = big_index_positions_buffer.get(); - } + /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this + /// array will be filled with [1, 3, 1, 0] but without constant string indices. + Format::IndexPositions index_positions; /// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation. /// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``]. std::vector substrings; - init(pattern, data, argument_number, constant_strings, index_positions, substrings); + Format::init(pattern, argument_number, constant_strings, index_positions, substrings); UInt64 final_size = 0; @@ -271,7 +109,7 @@ struct FormatImpl for (size_t j = 1; j < substrings.size(); ++j) { UInt64 arg = index_positions[j - 1]; - auto offset_ptr = offsets[arg]; + const auto * offset_ptr = offsets[arg]; UInt64 arg_offset = 0; UInt64 size = 0; diff --git a/tests/queries/0_stateless/02245_format_string_stack_overflow.reference b/tests/queries/0_stateless/02245_format_string_stack_overflow.reference new file mode 100644 index 00000000000..6e163a64914 --- /dev/null +++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.reference @@ -0,0 +1 @@ +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02245_format_string_stack_overflow.sql b/tests/queries/0_stateless/02245_format_string_stack_overflow.sql new file mode 100644 index 00000000000..1ee3606d3a6 --- /dev/null +++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.sql @@ -0,0 +1 @@ +select format('{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}{0}', toString(number)) str from numbers(1); From 6003a49a877908ac1d9f4651b7d507e6055e9a9b Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 28 Mar 2022 17:48:47 +0800 Subject: [PATCH 02/12] Fix style --- src/Common/format.h | 2 ++ src/Functions/formatString.h | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Common/format.h b/src/Common/format.h index 234ff90061c..a9382f247ab 100644 --- a/src/Common/format.h +++ b/src/Common/format.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h index 3c5057ecb97..44fbdac9378 100644 --- a/src/Functions/formatString.h +++ b/src/Functions/formatString.h @@ -17,10 +17,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} struct FormatImpl { From d0f01516db0c2d403f9d45898251bd417e78ad54 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 15:48:57 +0200 Subject: [PATCH 03/12] Resurrect automatic labelling --- .github/workflows/master.yml | 10 - tests/ci/cherry_pick_utils/parser.py | 8 - tests/ci/commit_status_helper.py | 5 + tests/ci/run_check.py | 34 +- utils/github/__init__.py | 1 - utils/github/backport.py | 185 ---------- utils/github/cherrypick.py | 323 ------------------ utils/github/local.py | 108 ------ utils/github/parser.py | 64 ---- utils/github/query.py | 492 --------------------------- 10 files changed, 31 insertions(+), 1199 deletions(-) delete mode 100644 utils/github/__init__.py delete mode 100644 utils/github/backport.py delete mode 100644 utils/github/cherrypick.py delete mode 100644 utils/github/local.py delete mode 100644 utils/github/parser.py delete mode 100644 utils/github/query.py diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 73cffc1d5d3..cfa95b84ee5 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -149,7 +149,6 @@ jobs: sudo rm -fr "$TEMP_PATH" SplitBuildSmokeTest: needs: [BuilderDebSplitted] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, style-checker] steps: - name: Set envs @@ -316,7 +315,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -362,7 +360,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinGCC: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -636,7 +633,6 @@ jobs: ########################################################################################## BuilderDebSplitted: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -682,7 +678,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinTidy: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -728,7 +723,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwin: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -774,7 +768,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinAarch64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -820,7 +813,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinFreeBSD: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -866,7 +858,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinDarwinAarch64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs @@ -912,7 +903,6 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinPPC64: needs: [DockerHubPush] - if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} runs-on: [self-hosted, builder] steps: - name: Set envs diff --git a/tests/ci/cherry_pick_utils/parser.py b/tests/ci/cherry_pick_utils/parser.py index d8348e6d964..29c05e5328f 100644 --- a/tests/ci/cherry_pick_utils/parser.py +++ b/tests/ci/cherry_pick_utils/parser.py @@ -20,8 +20,6 @@ class Description: def __init__(self, pull_request): self.label_name = str() - self.legal = False - self._parse(pull_request["bodyText"]) def _parse(self, text): @@ -39,12 +37,6 @@ class Description: category = stripped next_category = False - if ( - stripped - == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en" - ): - self.legal = True - category_headers = ( "Category (leave one):", "Changelog category (leave one):", diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index e379c9a2254..53dfb145b44 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -59,3 +59,8 @@ def post_commit_status_to_file(file_path, description, state, report_url): with open(file_path, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow([state, report_url, description]) + +def post_label(gh, pr_info, label_name): + repo = gh.get_repo(GITHUB_REPOSITORY) + pull_request = repo.get_pull(pr_info.number) + pull_request.add_to_labels(label_name) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 9c7ba13f8e4..74e60582e8d 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -8,7 +8,7 @@ from github import Github from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token -from commit_status_helper import get_commit +from commit_status_helper import get_commit, post_label NAME = "Run Check (actions)" @@ -81,6 +81,21 @@ TRUSTED_CONTRIBUTORS = { ] } +MAP_CATEGORY_TO_LABEL = { + "New Feature": "pr-feature", + "Bug Fix": "pr-bugfix", + "Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix", + "Improvement": "pr-improvement", + "Performance Improvement": "pr-performance", + "Backward Incompatible Change": "pr-backward-incompatible", + "Build/Testing/Packaging Improvement": "pr-build", + "Not for changelog (changelog entry is not required)": "pr-not-for-changelog", + "Not for changelog": "pr-not-for-changelog", + "Documentation (changelog entry is not required)": "pr-documentation", + "Documentation": "pr-documentation", + # 'Other': doesn't match anything +} + def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): if pr_user_login.lower() in TRUSTED_CONTRIBUTORS: @@ -168,7 +183,7 @@ def check_pr_description(pr_info): + second_category + "'" ) - return result_status[:140] + return result_status[:140], category elif re.match( r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] @@ -190,19 +205,19 @@ def check_pr_description(pr_info): i += 1 if not category: - return "Changelog category is empty" + return "Changelog category is empty", category # Filter out the PR categories that are not for changelog. if re.match( r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", category, ): - return "" + return "", category if not entry: - return f"Changelog entry required for category '{category}'" + return f"Changelog entry required for category '{category}'", category - return "" + return "", category if __name__ == "__main__": @@ -213,7 +228,10 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - description_report = check_pr_description(pr_info)[:139] + description_report, category = check_pr_description(pr_info) + if category in MAP_CATEGORY_TO_LABEL: + post_label(gh, pr_info, MAP_CATEGORY_TO_LABEL[category]) + if description_report: print("::notice ::Cannot run, description does not match the template") logging.info( @@ -225,7 +243,7 @@ if __name__ == "__main__": ) commit.create_status( context=NAME, - description=description_report, + description=description_report[:139], state="failure", target_url=url, ) diff --git a/utils/github/__init__.py b/utils/github/__init__.py deleted file mode 100644 index 40a96afc6ff..00000000000 --- a/utils/github/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/utils/github/backport.py b/utils/github/backport.py deleted file mode 100644 index 615c0d19ffa..00000000000 --- a/utils/github/backport.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding: utf-8 -*- - -try: - from clickhouse.utils.github.cherrypick import CherryPick - from clickhouse.utils.github.query import Query as RemoteRepo - from clickhouse.utils.github.local import Repository as LocalRepo -except: - from .cherrypick import CherryPick - from .query import Query as RemoteRepo - from .local import Repository as LocalRepo - -import argparse -import logging -import re -import sys - - -class Backport: - def __init__(self, token, owner, name, team): - self._gh = RemoteRepo( - token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7 - ) - self._token = token - self.default_branch_name = self._gh.default_branch - self.ssh_url = self._gh.ssh_url - - def getPullRequests(self, from_commit): - return self._gh.get_pull_requests(from_commit) - - def getBranchesWithRelease(self): - branches = set() - for pull_request in self._gh.find_pull_requests("release"): - branches.add(pull_request["headRefName"]) - return branches - - def execute(self, repo, upstream, until_commit, run_cherrypick): - repo = LocalRepo(repo, upstream, self.default_branch_name) - all_branches = repo.get_release_branches() # [(branch_name, base_commit)] - - release_branches = self.getBranchesWithRelease() - - branches = [] - # iterate over all branches to preserve their precedence. - for branch in all_branches: - if branch[0] in release_branches: - branches.append(branch) - - if not branches: - logging.info("No release branches found!") - return - - for branch in branches: - logging.info("Found release branch: %s", branch[0]) - - if not until_commit: - until_commit = branches[0][1] - pull_requests = self.getPullRequests(until_commit) - - backport_map = {} - - RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$") - RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$") - RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$") - - # pull-requests are sorted by ancestry from the most recent. - for pr in pull_requests: - while repo.comparator(branches[-1][1]) >= repo.comparator( - pr["mergeCommit"]["oid"] - ): - logging.info( - "PR #{} is already inside {}. Dropping this branch for further PRs".format( - pr["number"], branches[-1][0] - ) - ) - branches.pop() - - logging.info("Processing PR #{}".format(pr["number"])) - - assert len(branches) - - branch_set = set([branch[0] for branch in branches]) - - # First pass. Find all must-backports - for label in pr["labels"]["nodes"]: - if label["name"] == "pr-must-backport": - backport_map[pr["number"]] = branch_set.copy() - continue - matched = RE_MUST_BACKPORT.match(label["name"]) - if matched: - if pr["number"] not in backport_map: - backport_map[pr["number"]] = set() - backport_map[pr["number"]].add(matched.group(1)) - - # Second pass. Find all no-backports - for label in pr["labels"]["nodes"]: - if label["name"] == "pr-no-backport" and pr["number"] in backport_map: - del backport_map[pr["number"]] - break - matched_no_backport = RE_NO_BACKPORT.match(label["name"]) - matched_backported = RE_BACKPORTED.match(label["name"]) - if ( - matched_no_backport - and pr["number"] in backport_map - and matched_no_backport.group(1) in backport_map[pr["number"]] - ): - backport_map[pr["number"]].remove(matched_no_backport.group(1)) - logging.info( - "\tskipping %s because of forced no-backport", - matched_no_backport.group(1), - ) - elif ( - matched_backported - and pr["number"] in backport_map - and matched_backported.group(1) in backport_map[pr["number"]] - ): - backport_map[pr["number"]].remove(matched_backported.group(1)) - logging.info( - "\tskipping %s because it's already backported manually", - matched_backported.group(1), - ) - - for pr, branches in list(backport_map.items()): - logging.info("PR #%s needs to be backported to:", pr) - for branch in branches: - logging.info( - "\t%s, and the status is: %s", - branch, - run_cherrypick(self._token, pr, branch), - ) - - # print API costs - logging.info("\nGitHub API total costs per query:") - for name, value in list(self._gh.api_costs.items()): - logging.info("%s : %s", name, value) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--token", type=str, required=True, help="token for Github access" - ) - parser.add_argument( - "--repo", - type=str, - required=True, - help="path to full repository", - metavar="PATH", - ) - parser.add_argument( - "--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT" - ) - parser.add_argument( - "--dry-run", - action="store_true", - help="do not create or merge any PRs", - default=False, - ) - parser.add_argument( - "--verbose", - "-v", - action="store_true", - help="more verbose output", - default=False, - ) - parser.add_argument( - "--upstream", - "-u", - type=str, - help="remote name of upstream in repository", - default="origin", - ) - args = parser.parse_args() - - if args.verbose: - logging.basicConfig( - format="%(message)s", stream=sys.stdout, level=logging.DEBUG - ) - else: - logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO) - - cherrypick_run = lambda token, pr, branch: CherryPick( - token, "ClickHouse", "ClickHouse", "core", pr, branch - ).execute(args.repo, args.dry_run) - bp = Backport(args.token, "ClickHouse", "ClickHouse", "core") - bp.execute(args.repo, args.upstream, args.til, cherrypick_run) diff --git a/utils/github/cherrypick.py b/utils/github/cherrypick.py deleted file mode 100644 index c6469fa62a9..00000000000 --- a/utils/github/cherrypick.py +++ /dev/null @@ -1,323 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Backports changes from PR to release branch. -Requires multiple separate runs as part of the implementation. - -First run should do the following: -1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr}) -2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit}) -3. Create PR from temporary branch to backport branch (emulating cherry-pick). - -Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it. - -Third run creates PR from backport branch (with merged previous PR) to release branch. -""" - -try: - from clickhouse.utils.github.query import Query as RemoteRepo -except: - from .query import Query as RemoteRepo - -import argparse -from enum import Enum -import logging -import os -import subprocess -import sys - - -class CherryPick: - class Status(Enum): - DISCARDED = "discarded" - NOT_INITIATED = "not started" - FIRST_MERGEABLE = "waiting for 1st stage" - FIRST_CONFLICTS = "conflicts on 1st stage" - SECOND_MERGEABLE = "waiting for 2nd stage" - SECOND_CONFLICTS = "conflicts on 2nd stage" - MERGED = "backported" - - def _run(self, args): - out = subprocess.check_output(args).rstrip() - logging.debug(out) - return out - - def __init__(self, token, owner, name, team, pr_number, target_branch): - self._gh = RemoteRepo(token, owner=owner, name=name, team=team) - self._pr = self._gh.get_pull_request(pr_number) - - self.ssh_url = self._gh.ssh_url - - # TODO: check if pull-request is merged. - - self.merge_commit_oid = self._pr["mergeCommit"]["oid"] - - self.target_branch = target_branch - self.backport_branch = "backport/{branch}/{pr}".format( - branch=target_branch, pr=pr_number - ) - self.cherrypick_branch = "cherrypick/{branch}/{oid}".format( - branch=target_branch, oid=self.merge_commit_oid - ) - - def getCherryPickPullRequest(self): - return self._gh.find_pull_request( - base=self.backport_branch, head=self.cherrypick_branch - ) - - def createCherryPickPullRequest(self, repo_path): - DESCRIPTION = ( - "This pull-request is a first step of an automated backporting.\n" - "It contains changes like after calling a local command `git cherry-pick`.\n" - "If you intend to continue backporting this changes, then resolve all conflicts if any.\n" - "Otherwise, if you do not want to backport them, then just close this pull-request.\n" - "\n" - "The check results does not matter at this step - you can safely ignore them.\n" - "Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n" - ) - - # FIXME: replace with something better than os.system() - git_prefix = [ - "git", - "-C", - repo_path, - "-c", - "user.email=robot-clickhouse@yandex-team.ru", - "-c", - "user.name=robot-clickhouse", - ] - base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"] - - # Create separate branch for backporting, and make it look like real cherry-pick. - self._run(git_prefix + ["checkout", "-f", self.target_branch]) - self._run(git_prefix + ["checkout", "-B", self.backport_branch]) - self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid]) - - # Create secondary branch to allow pull request with cherry-picked commit. - self._run( - git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid] - ) - - self._run( - git_prefix - + [ - "push", - "-f", - "origin", - "{branch}:{branch}".format(branch=self.backport_branch), - ] - ) - self._run( - git_prefix - + [ - "push", - "-f", - "origin", - "{branch}:{branch}".format(branch=self.cherrypick_branch), - ] - ) - - # Create pull-request like a local cherry-pick - pr = self._gh.create_pull_request( - source=self.cherrypick_branch, - target=self.backport_branch, - title="Cherry pick #{number} to {target}: {title}".format( - number=self._pr["number"], - target=self.target_branch, - title=self._pr["title"].replace('"', '\\"'), - ), - description="Original pull-request #{}\n\n{}".format( - self._pr["number"], DESCRIPTION - ), - ) - - # FIXME: use `team` to leave a single eligible assignee. - self._gh.add_assignee(pr, self._pr["author"]) - self._gh.add_assignee(pr, self._pr["mergedBy"]) - - self._gh.set_label(pr, "do not test") - self._gh.set_label(pr, "pr-cherrypick") - - return pr - - def mergeCherryPickPullRequest(self, cherrypick_pr): - return self._gh.merge_pull_request(cherrypick_pr["id"]) - - def getBackportPullRequest(self): - return self._gh.find_pull_request( - base=self.target_branch, head=self.backport_branch - ) - - def createBackportPullRequest(self, cherrypick_pr, repo_path): - DESCRIPTION = ( - "This pull-request is a last step of an automated backporting.\n" - "Treat it as a standard pull-request: look at the checks and resolve conflicts.\n" - "Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n" - ) - - git_prefix = [ - "git", - "-C", - repo_path, - "-c", - "user.email=robot-clickhouse@clickhouse.com", - "-c", - "user.name=robot-clickhouse", - ] - - pr_title = "Backport #{number} to {target}: {title}".format( - number=self._pr["number"], - target=self.target_branch, - title=self._pr["title"].replace('"', '\\"'), - ) - - self._run(git_prefix + ["checkout", "-f", self.backport_branch]) - self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch]) - self._run( - git_prefix - + [ - "reset", - "--soft", - self._run( - git_prefix - + [ - "merge-base", - "origin/" + self.target_branch, - self.backport_branch, - ] - ), - ] - ) - self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title]) - self._run( - git_prefix - + [ - "push", - "-f", - "origin", - "{branch}:{branch}".format(branch=self.backport_branch), - ] - ) - - pr = self._gh.create_pull_request( - source=self.backport_branch, - target=self.target_branch, - title=pr_title, - description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format( - self._pr["number"], cherrypick_pr["number"], DESCRIPTION - ), - ) - - # FIXME: use `team` to leave a single eligible assignee. - self._gh.add_assignee(pr, self._pr["author"]) - self._gh.add_assignee(pr, self._pr["mergedBy"]) - - self._gh.set_label(pr, "pr-backport") - - return pr - - def execute(self, repo_path, dry_run=False): - pr1 = self.getCherryPickPullRequest() - if not pr1: - if not dry_run: - pr1 = self.createCherryPickPullRequest(repo_path) - logging.debug( - "Created PR with cherry-pick of %s to %s: %s", - self._pr["number"], - self.target_branch, - pr1["url"], - ) - else: - return CherryPick.Status.NOT_INITIATED - else: - logging.debug( - "Found PR with cherry-pick of %s to %s: %s", - self._pr["number"], - self.target_branch, - pr1["url"], - ) - - if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]: - if not dry_run: - pr1 = self.mergeCherryPickPullRequest(pr1) - logging.debug( - "Merged PR with cherry-pick of %s to %s: %s", - self._pr["number"], - self.target_branch, - pr1["url"], - ) - - if not pr1["merged"]: - logging.debug( - "Waiting for PR with cherry-pick of %s to %s: %s", - self._pr["number"], - self.target_branch, - pr1["url"], - ) - - if pr1["closed"]: - return CherryPick.Status.DISCARDED - elif pr1["mergeable"] == "CONFLICTING": - return CherryPick.Status.FIRST_CONFLICTS - else: - return CherryPick.Status.FIRST_MERGEABLE - - pr2 = self.getBackportPullRequest() - if not pr2: - if not dry_run: - pr2 = self.createBackportPullRequest(pr1, repo_path) - logging.debug( - "Created PR with backport of %s to %s: %s", - self._pr["number"], - self.target_branch, - pr2["url"], - ) - else: - return CherryPick.Status.FIRST_MERGEABLE - else: - logging.debug( - "Found PR with backport of %s to %s: %s", - self._pr["number"], - self.target_branch, - pr2["url"], - ) - - if pr2["merged"]: - return CherryPick.Status.MERGED - elif pr2["closed"]: - return CherryPick.Status.DISCARDED - elif pr2["mergeable"] == "CONFLICTING": - return CherryPick.Status.SECOND_CONFLICTS - else: - return CherryPick.Status.SECOND_MERGEABLE - - -if __name__ == "__main__": - logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG) - - parser = argparse.ArgumentParser() - parser.add_argument( - "--token", "-t", type=str, required=True, help="token for Github access" - ) - parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick") - parser.add_argument( - "--branch", - "-b", - type=str, - required=True, - help="target branch name for cherry-pick", - ) - parser.add_argument( - "--repo", - "-r", - type=str, - required=True, - help="path to full repository", - metavar="PATH", - ) - args = parser.parse_args() - - cp = CherryPick( - args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch - ) - cp.execute(args.repo) diff --git a/utils/github/local.py b/utils/github/local.py deleted file mode 100644 index 571c9102ba0..00000000000 --- a/utils/github/local.py +++ /dev/null @@ -1,108 +0,0 @@ -# -*- coding: utf-8 -*- - -import functools -import logging -import os -import re - - -class RepositoryBase: - def __init__(self, repo_path): - import git - - self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path)) - - # comparator of commits - def cmp(x, y): - if str(x) == str(y): - return 0 - if self._repo.is_ancestor(x, y): - return -1 - else: - return 1 - - self.comparator = functools.cmp_to_key(cmp) - - def get_head_commit(self): - return self._repo.commit(self._default) - - def iterate(self, begin, end): - rev_range = "{}...{}".format(begin, end) - for commit in self._repo.iter_commits(rev_range, first_parent=True): - yield commit - - -class Repository(RepositoryBase): - def __init__(self, repo_path, remote_name, default_branch_name): - super(Repository, self).__init__(repo_path) - self._remote = self._repo.remotes[remote_name] - self._remote.fetch() - self._default = self._remote.refs[default_branch_name] - - def get_release_branches(self): - """ - Returns sorted list of tuples: - * remote branch (git.refs.remote.RemoteReference), - * base commit (git.Commit), - * head (git.Commit)). - List is sorted by commits in ascending order. - """ - release_branches = [] - - RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$") - - for branch in [ - r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path) - ]: - base = self._repo.merge_base(self._default, self._repo.commit(branch)) - if not base: - logging.info( - "Branch %s is not based on branch %s. Ignoring.", - branch.path, - self._default, - ) - elif len(base) > 1: - logging.info( - "Branch %s has more than one base commit. Ignoring.", branch.path - ) - else: - release_branches.append((os.path.basename(branch.name), base[0])) - - return sorted(release_branches, key=lambda x: self.comparator(x[1])) - - -class BareRepository(RepositoryBase): - def __init__(self, repo_path, default_branch_name): - super(BareRepository, self).__init__(repo_path) - self._default = self._repo.branches[default_branch_name] - - def get_release_branches(self): - """ - Returns sorted list of tuples: - * branch (git.refs.head?), - * base commit (git.Commit), - * head (git.Commit)). - List is sorted by commits in ascending order. - """ - release_branches = [] - - RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$") - - for branch in [ - r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path) - ]: - base = self._repo.merge_base(self._default, self._repo.commit(branch)) - if not base: - logging.info( - "Branch %s is not based on branch %s. Ignoring.", - branch.path, - self._default, - ) - elif len(base) > 1: - logging.info( - "Branch %s has more than one base commit. Ignoring.", branch.path - ) - else: - release_branches.append((os.path.basename(branch.name), base[0])) - - return sorted(release_branches, key=lambda x: self.comparator(x[1])) diff --git a/utils/github/parser.py b/utils/github/parser.py deleted file mode 100644 index d8348e6d964..00000000000 --- a/utils/github/parser.py +++ /dev/null @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- - - -class Description: - """Parsed description representation""" - - MAP_CATEGORY_TO_LABEL = { - "New Feature": "pr-feature", - "Bug Fix": "pr-bugfix", - "Improvement": "pr-improvement", - "Performance Improvement": "pr-performance", - # 'Backward Incompatible Change': doesn't match anything - "Build/Testing/Packaging Improvement": "pr-build", - "Non-significant (changelog entry is not needed)": "pr-non-significant", - "Non-significant (changelog entry is not required)": "pr-non-significant", - "Non-significant": "pr-non-significant", - "Documentation (changelog entry is not required)": "pr-documentation", - # 'Other': doesn't match anything - } - - def __init__(self, pull_request): - self.label_name = str() - self.legal = False - - self._parse(pull_request["bodyText"]) - - def _parse(self, text): - lines = text.splitlines() - next_category = False - category = str() - - for line in lines: - stripped = line.strip() - - if not stripped: - continue - - if next_category: - category = stripped - next_category = False - - if ( - stripped - == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en" - ): - self.legal = True - - category_headers = ( - "Category (leave one):", - "Changelog category (leave one):", - "Changelog category:", - "Category:", - ) - - if stripped in category_headers: - next_category = True - - if category in Description.MAP_CATEGORY_TO_LABEL: - self.label_name = Description.MAP_CATEGORY_TO_LABEL[category] - else: - if not category: - print("Cannot find category in pr description") - else: - print(("Unknown category: " + category)) diff --git a/utils/github/query.py b/utils/github/query.py deleted file mode 100644 index 7afbc57781c..00000000000 --- a/utils/github/query.py +++ /dev/null @@ -1,492 +0,0 @@ -# -*- coding: utf-8 -*- - -import requests - - -class Query: - """ - Implements queries to the Github API using GraphQL - """ - - _PULL_REQUEST = """ - author {{ - ... on User {{ - id - login - }} - }} - - baseRepository {{ - nameWithOwner - }} - - mergeCommit {{ - oid - parents(first: {min_page_size}) {{ - totalCount - nodes {{ - oid - }} - }} - }} - - mergedBy {{ - ... on User {{ - id - login - }} - }} - - baseRefName - closed - headRefName - id - mergeable - merged - number - title - url - """ - - def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10): - self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size) - - self._token = token - self._owner = owner - self._name = name - self._team = team - - self._max_page_size = max_page_size - self._min_page_size = min_page_size - - self.api_costs = {} - - repo = self.get_repository() - self._id = repo["id"] - self.ssh_url = repo["sshUrl"] - self.default_branch = repo["defaultBranchRef"]["name"] - - self.members = set(self.get_members()) - - def get_repository(self): - _QUERY = """ - repository(owner: "{owner}" name: "{name}") {{ - defaultBranchRef {{ - name - }} - id - sshUrl - }} - """ - - query = _QUERY.format(owner=self._owner, name=self._name) - return self._run(query)["repository"] - - def get_members(self): - """Get all team members for organization - - Returns: - members: a map of members' logins to ids - """ - - _QUERY = """ - organization(login: "{organization}") {{ - team(slug: "{team}") {{ - members(first: {max_page_size} {next}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - id - login - }} - }} - }} - }} - """ - - members = {} - not_end = True - query = _QUERY.format( - organization=self._owner, - team=self._team, - max_page_size=self._max_page_size, - next="", - ) - - while not_end: - result = self._run(query)["organization"]["team"] - if result is None: - break - result = result["members"] - not_end = result["pageInfo"]["hasNextPage"] - query = _QUERY.format( - organization=self._owner, - team=self._team, - max_page_size=self._max_page_size, - next='after: "{}"'.format(result["pageInfo"]["endCursor"]), - ) - - members += dict([(node["login"], node["id"]) for node in result["nodes"]]) - - return members - - def get_pull_request(self, number): - _QUERY = """ - repository(owner: "{owner}" name: "{name}") {{ - pullRequest(number: {number}) {{ - {pull_request_data} - }} - }} - """ - - query = _QUERY.format( - owner=self._owner, - name=self._name, - number=number, - pull_request_data=self._PULL_REQUEST, - min_page_size=self._min_page_size, - ) - return self._run(query)["repository"]["pullRequest"] - - def find_pull_request(self, base, head): - _QUERY = """ - repository(owner: "{owner}" name: "{name}") {{ - pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{ - nodes {{ - {pull_request_data} - }} - totalCount - }} - }} - """ - - query = _QUERY.format( - owner=self._owner, - name=self._name, - base=base, - head=head, - pull_request_data=self._PULL_REQUEST, - min_page_size=self._min_page_size, - ) - result = self._run(query)["repository"]["pullRequests"] - if result["totalCount"] > 0: - return result["nodes"][0] - else: - return {} - - def find_pull_requests(self, label_name): - """ - Get all pull-requests filtered by label name - """ - _QUERY = """ - repository(owner: "{owner}" name: "{name}") {{ - pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{ - nodes {{ - {pull_request_data} - }} - }} - }} - """ - - query = _QUERY.format( - owner=self._owner, - name=self._name, - label_name=label_name, - pull_request_data=self._PULL_REQUEST, - min_page_size=self._min_page_size, - ) - return self._run(query)["repository"]["pullRequests"]["nodes"] - - def get_pull_requests(self, before_commit): - """ - Get all merged pull-requests from the HEAD of default branch to the last commit (excluding) - """ - - _QUERY = """ - repository(owner: "{owner}" name: "{name}") {{ - defaultBranchRef {{ - target {{ - ... on Commit {{ - history(first: {max_page_size} {next}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - oid - associatedPullRequests(first: {min_page_size}) {{ - totalCount - nodes {{ - ... on PullRequest {{ - {pull_request_data} - - labels(first: {min_page_size}) {{ - totalCount - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - name - color - }} - }} - }} - }} - }} - }} - }} - }} - }} - }} - }} - """ - - pull_requests = [] - not_end = True - query = _QUERY.format( - owner=self._owner, - name=self._name, - max_page_size=self._max_page_size, - min_page_size=self._min_page_size, - pull_request_data=self._PULL_REQUEST, - next="", - ) - - while not_end: - result = self._run(query)["repository"]["defaultBranchRef"]["target"][ - "history" - ] - not_end = result["pageInfo"]["hasNextPage"] - query = _QUERY.format( - owner=self._owner, - name=self._name, - max_page_size=self._max_page_size, - min_page_size=self._min_page_size, - pull_request_data=self._PULL_REQUEST, - next='after: "{}"'.format(result["pageInfo"]["endCursor"]), - ) - - for commit in result["nodes"]: - # FIXME: maybe include `before_commit`? - if str(commit["oid"]) == str(before_commit): - not_end = False - break - - # TODO: fetch all pull-requests that were merged in a single commit. - assert ( - commit["associatedPullRequests"]["totalCount"] - <= self._min_page_size - ) - - for pull_request in commit["associatedPullRequests"]["nodes"]: - if ( - pull_request["baseRepository"]["nameWithOwner"] - == "{}/{}".format(self._owner, self._name) - and pull_request["baseRefName"] == self.default_branch - and pull_request["mergeCommit"]["oid"] == commit["oid"] - ): - pull_requests.append(pull_request) - - return pull_requests - - def create_pull_request( - self, source, target, title, description="", draft=False, can_modify=True - ): - _QUERY = """ - createPullRequest(input: {{ - baseRefName: "{target}", - headRefName: "{source}", - repositoryId: "{id}", - title: "{title}", - body: "{body}", - draft: {draft}, - maintainerCanModify: {modify} - }}) {{ - pullRequest {{ - {pull_request_data} - }} - }} - """ - - query = _QUERY.format( - target=target, - source=source, - id=self._id, - title=title, - body=description, - draft="true" if draft else "false", - modify="true" if can_modify else "false", - pull_request_data=self._PULL_REQUEST, - ) - return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"] - - def merge_pull_request(self, id): - _QUERY = """ - mergePullRequest(input: {{ - pullRequestId: "{id}" - }}) {{ - pullRequest {{ - {pull_request_data} - }} - }} - """ - - query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST) - return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"] - - # FIXME: figure out how to add more assignees at once - def add_assignee(self, pr, assignee): - _QUERY = """ - addAssigneesToAssignable(input: {{ - assignableId: "{id1}", - assigneeIds: "{id2}" - }}) {{ - clientMutationId - }} - """ - - query = _QUERY.format(id1=pr["id"], id2=assignee["id"]) - self._run(query, is_mutation=True) - - def set_label(self, pull_request, label_name): - """ - Set label by name to the pull request - - Args: - pull_request: JSON object returned by `get_pull_requests()` - label_name (string): label name - """ - - _GET_LABEL = """ - repository(owner: "{owner}" name: "{name}") {{ - labels(first: {max_page_size} {next} query: "{label_name}") {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - id - name - color - }} - }} - }} - """ - - _SET_LABEL = """ - addLabelsToLabelable(input: {{ - labelableId: "{pr_id}", - labelIds: "{label_id}" - }}) {{ - clientMutationId - }} - """ - - labels = [] - not_end = True - query = _GET_LABEL.format( - owner=self._owner, - name=self._name, - label_name=label_name, - max_page_size=self._max_page_size, - next="", - ) - - while not_end: - result = self._run(query)["repository"]["labels"] - not_end = result["pageInfo"]["hasNextPage"] - query = _GET_LABEL.format( - owner=self._owner, - name=self._name, - label_name=label_name, - max_page_size=self._max_page_size, - next='after: "{}"'.format(result["pageInfo"]["endCursor"]), - ) - - labels += [label for label in result["nodes"]] - - if not labels: - return - - query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"]) - self._run(query, is_mutation=True) - - def _run(self, query, is_mutation=False): - from requests.adapters import HTTPAdapter - from urllib3.util.retry import Retry - - def requests_retry_session( - retries=3, - backoff_factor=0.3, - status_forcelist=(500, 502, 504), - session=None, - ): - session = session or requests.Session() - retry = Retry( - total=retries, - read=retries, - connect=retries, - backoff_factor=backoff_factor, - status_forcelist=status_forcelist, - ) - adapter = HTTPAdapter(max_retries=retry) - session.mount("http://", adapter) - session.mount("https://", adapter) - return session - - headers = {"Authorization": "bearer {}".format(self._token)} - if is_mutation: - query = """ - mutation {{ - {query} - }} - """.format( - query=query - ) - else: - query = """ - query {{ - {query} - rateLimit {{ - cost - remaining - }} - }} - """.format( - query=query - ) - - while True: - request = requests_retry_session().post( - "https://api.github.com/graphql", json={"query": query}, headers=headers - ) - if request.status_code == 200: - result = request.json() - if "errors" in result: - raise Exception( - "Errors occurred: {}\nOriginal query: {}".format( - result["errors"], query - ) - ) - - if not is_mutation: - import inspect - - caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3] - if caller not in list(self.api_costs.keys()): - self.api_costs[caller] = 0 - self.api_costs[caller] += result["data"]["rateLimit"]["cost"] - - return result["data"] - else: - import json - - raise Exception( - "Query failed with code {code}:\n{json}".format( - code=request.status_code, - json=json.dumps(request.json(), indent=4), - ) - ) From bf591b971a6d9dbd1b63357ac353066d6348fc34 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 15:53:08 +0200 Subject: [PATCH 04/12] Moar categories --- tests/ci/run_check.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 74e60582e8d..608a708b04b 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -89,6 +89,10 @@ MAP_CATEGORY_TO_LABEL = { "Performance Improvement": "pr-performance", "Backward Incompatible Change": "pr-backward-incompatible", "Build/Testing/Packaging Improvement": "pr-build", + "Build Improvement": "pr-build", + "Build/Testing Improvement": "pr-build", + "Build": "pr-build", + "Packaging Improvement": "pr-build", "Not for changelog (changelog entry is not required)": "pr-not-for-changelog", "Not for changelog": "pr-not-for-changelog", "Documentation (changelog entry is not required)": "pr-documentation", From d59941e4f6ccd623b1f938460e6a712dd69a73d0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 19:15:25 +0200 Subject: [PATCH 05/12] Style --- tests/ci/commit_status_helper.py | 1 + tests/integration/test_allowed_url_from_config/test.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 53dfb145b44..4c79d5ace07 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -60,6 +60,7 @@ def post_commit_status_to_file(file_path, description, state, report_url): out = csv.writer(f, delimiter="\t") out.writerow([state, report_url, description]) + def post_label(gh, pr_info, label_name): repo = gh.get_repo(GITHUB_REPOSITORY) pull_request = repo.get_pull(pr_info.number) diff --git a/tests/integration/test_allowed_url_from_config/test.py b/tests/integration/test_allowed_url_from_config/test.py index 13f3929902d..01a2a500ebf 100644 --- a/tests/integration/test_allowed_url_from_config/test.py +++ b/tests/integration/test_allowed_url_from_config/test.py @@ -280,4 +280,4 @@ def test_HDFS(start_cluster): def test_schema_inference(start_cluster): error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')") - assert(error.find('ReadWriteBufferFromHTTPBase') == -1) + assert error.find("ReadWriteBufferFromHTTPBase") == -1 From 9220bedb7dc57a1f0ee2be1999610fad75ea674a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 19:28:18 +0200 Subject: [PATCH 06/12] Add submodule changed --- tests/ci/commit_status_helper.py | 5 +++-- tests/ci/pr_info.py | 9 +++++++++ tests/ci/run_check.py | 12 +++++++++--- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 4c79d5ace07..b4a3e49372f 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -61,7 +61,8 @@ def post_commit_status_to_file(file_path, description, state, report_url): out.writerow([state, report_url, description]) -def post_label(gh, pr_info, label_name): +def post_labels(gh, pr_info, labels_names): repo = gh.get_repo(GITHUB_REPOSITORY) pull_request = repo.get_pull(pr_info.number) - pull_request.add_to_labels(label_name) + for label in labels_names: + pull_request.add_to_labels(label) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ee4399792ae..0de4ec89124 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -236,6 +236,15 @@ class PRInfo: return True return False + def has_changes_in_submodules(self): + if self.changed_files is None or not self.changed_files: + return True + + for f in self.changed_files: + if "contrib" in f: + return True + return False + def can_skip_builds_and_use_version_from_master(self): # TODO: See a broken loop if "force tests" in self.labels: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 608a708b04b..f0100082c71 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -8,7 +8,7 @@ from github import Github from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token -from commit_status_helper import get_commit, post_label +from commit_status_helper import get_commit, post_labels NAME = "Run Check (actions)" @@ -22,6 +22,7 @@ OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"} CAN_BE_TESTED_LABEL = "can be tested" DO_NOT_TEST_LABEL = "do not test" FORCE_TESTS_LABEL = "force tests" +SUBMODULE_CHANGED_LABEL = "submodule changed" # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in @@ -227,15 +228,20 @@ def check_pr_description(pr_info): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - pr_info = PRInfo(need_orgs=True, pr_event_from_api=True) + pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True) can_run, description, labels_state = should_run_checks_for_pr(pr_info) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) description_report, category = check_pr_description(pr_info) + pr_labels = [] if category in MAP_CATEGORY_TO_LABEL: - post_label(gh, pr_info, MAP_CATEGORY_TO_LABEL[category]) + pr_labels.append(MAP_CATEGORY_TO_LABEL[category]) + if pr_info.has_changes_in_submodules(): + pr_labels.append(SUBMODULE_CHANGED_LABEL) + + post_labels(gh, pr_info, pr_labels) if description_report: print("::notice ::Cannot run, description does not match the template") logging.info( From 999558f00fa01605c42eacd2f85243cf74291e4c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 19:28:45 +0200 Subject: [PATCH 07/12] Try fake update submodule --- contrib/NuRaft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 1707a7572aa..7c6c6961347 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d +Subproject commit 7c6c696134706eeef274787c9282e40416dc03e8 From 1a8b2c9637c53bef291c0ab33866f84ef437f2e4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 19:50:06 +0200 Subject: [PATCH 08/12] Remove submodule changed --- tests/ci/commit_status_helper.py | 7 +++++++ tests/ci/run_check.py | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index b4a3e49372f..a53ce6715d5 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -61,6 +61,13 @@ def post_commit_status_to_file(file_path, description, state, report_url): out.writerow([state, report_url, description]) +def remove_labels(gh, pr_info, labels_names): + repo = gh.get_repo(GITHUB_REPOSITORY) + pull_request = repo.get_pull(pr_info.number) + for label in labels_names: + pull_request.remove_from_labels(label) + + def post_labels(gh, pr_info, labels_names): repo = gh.get_repo(GITHUB_REPOSITORY) pull_request = repo.get_pull(pr_info.number) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index f0100082c71..0138808c6ae 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -8,7 +8,7 @@ from github import Github from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token -from commit_status_helper import get_commit, post_labels +from commit_status_helper import get_commit, post_labels, remove_labels NAME = "Run Check (actions)" @@ -235,13 +235,19 @@ if __name__ == "__main__": description_report, category = check_pr_description(pr_info) pr_labels = [] - if category in MAP_CATEGORY_TO_LABEL: + if ( + category in MAP_CATEGORY_TO_LABEL + and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels + ): pr_labels.append(MAP_CATEGORY_TO_LABEL[category]) if pr_info.has_changes_in_submodules(): pr_labels.append(SUBMODULE_CHANGED_LABEL) + elif SUBMODULE_CHANGED_LABEL in pr_info.labels: + remove_labels(gh, pr_info, [SUBMODULE_CHANGED_LABEL]) - post_labels(gh, pr_info, pr_labels) + if pr_labels: + post_labels(gh, pr_info, pr_labels) if description_report: print("::notice ::Cannot run, description does not match the template") logging.info( From 2e4ab305dd2a6951ad045b6cbdb920730710ddff Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Mar 2022 19:50:13 +0200 Subject: [PATCH 09/12] Revert "Try fake update submodule" This reverts commit 999558f00fa01605c42eacd2f85243cf74291e4c. --- contrib/NuRaft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 7c6c6961347..1707a7572aa 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 7c6c696134706eeef274787c9282e40416dc03e8 +Subproject commit 1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d From 68ec0d92c0c5446d7a32156ae2940e78bedb582f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Mar 2022 10:58:34 +0200 Subject: [PATCH 10/12] Remove if category changed --- tests/ci/run_check.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 0138808c6ae..5f3792f3432 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -234,20 +234,29 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) description_report, category = check_pr_description(pr_info) - pr_labels = [] + pr_labels_to_add = [] + pr_labels_to_remove = [] if ( category in MAP_CATEGORY_TO_LABEL and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels ): - pr_labels.append(MAP_CATEGORY_TO_LABEL[category]) + pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category]) + + for label in pr_info.labels: + if label in MAP_CATEGORY_TO_LABEL and label not in pr_labels_to_add: + pr_labels_to_remove.append(label) if pr_info.has_changes_in_submodules(): - pr_labels.append(SUBMODULE_CHANGED_LABEL) + pr_labels_to_add.append(SUBMODULE_CHANGED_LABEL) elif SUBMODULE_CHANGED_LABEL in pr_info.labels: - remove_labels(gh, pr_info, [SUBMODULE_CHANGED_LABEL]) + pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL) + + if pr_labels_to_add: + post_labels(gh, pr_info, pr_labels_to_add) + + if pr_labels_to_remove: + remove_labels(gh, pr_info, pr_labels_to_remove) - if pr_labels: - post_labels(gh, pr_info, pr_labels) if description_report: print("::notice ::Cannot run, description does not match the template") logging.info( From bcf64a73d1c272dca22a1ca910df3153a6a66784 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Mar 2022 11:00:45 +0200 Subject: [PATCH 11/12] Followup --- tests/ci/run_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 5f3792f3432..8f28287e88b 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -243,7 +243,7 @@ if __name__ == "__main__": pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category]) for label in pr_info.labels: - if label in MAP_CATEGORY_TO_LABEL and label not in pr_labels_to_add: + if label in MAP_CATEGORY_TO_LABEL.values() and label not in pr_labels_to_add: pr_labels_to_remove.append(label) if pr_info.has_changes_in_submodules(): From c510ede8dd5b9e739be4de6b6c57aa8bbddfa22b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Mar 2022 11:19:11 +0200 Subject: [PATCH 12/12] Fixup --- tests/ci/run_check.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 8f28287e88b..93dc77124c2 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -243,7 +243,11 @@ if __name__ == "__main__": pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category]) for label in pr_info.labels: - if label in MAP_CATEGORY_TO_LABEL.values() and label not in pr_labels_to_add: + if ( + label in MAP_CATEGORY_TO_LABEL.values() + and category in MAP_CATEGORY_TO_LABEL + and label != MAP_CATEGORY_TO_LABEL[category] + ): pr_labels_to_remove.append(label) if pr_info.has_changes_in_submodules():