Merge branch 'master' into is-secure-client-info

2024-11-28 18:42:26 +00:00 · 2022-03-30 11:26:05 +00:00 · 2022-03-30 11:26:05 +00:00 · 7bc5731025
commit 7bc5731025
parent 87bab4f2ea 31c367d3cd
17 changed files with 290 additions and 1401 deletions
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -149,7 +149,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH"
  SplitBuildSmokeTest:
    needs: [BuilderDebSplitted]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Set envs
@ -316,7 +315,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinRelease:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -362,7 +360,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinGCC:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -636,7 +633,6 @@ jobs:
 ##########################################################################################
  BuilderDebSplitted:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -682,7 +678,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinTidy:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -728,7 +723,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinDarwin:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -774,7 +768,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinAarch64:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -820,7 +813,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinFreeBSD:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -866,7 +858,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinDarwinAarch64:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -912,7 +903,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinPPC64:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
--- a/src/Common/format.h
+++ b/src/Common/format.h
@ -0,0 +1,178 @@
 #pragma once
 #include <base/types.h>
 #include <Common/Exception.h>
 #include <Common/PODArray.h>
 #include <Common/StringUtils/StringUtils.h>
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
 }
 namespace Format
 {
    using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
    static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
    {
        res = 0;
        for (UInt64 pos = l; pos < r; ++pos)
        {
            if (!isNumericASCII(description[pos]))
                throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
            res = res * 10 + description[pos] - '0';
            if (res >= argument_number)
                throw Exception(
                    "Too big number for arguments, must be at most " + std::to_string(argument_number - 1), ErrorCodes::BAD_ARGUMENTS);
        }
    }
    static inline void init(
        const String & pattern,
        size_t argument_number,
        const std::vector<std::optional<String>> & constant_strings,
        IndexPositions & index_positions,
        std::vector<String> & substrings)
    {
        /// Is current position after open curly brace.
        bool is_open_curly = false;
        /// The position of last open token.
        size_t last_open = -1;
        /// Is formatting in a plain {} token.
        std::optional<bool> is_plain_numbering;
        UInt64 index_if_plain = 0;
        /// Left position of adding substrings, just to the closed brace position or the start of the string.
        /// Invariant --- the start of substring is in this position.
        size_t start_pos = 0;
        /// A flag to decide whether we should glue the constant strings.
        bool glue_to_next = false;
        /// Handling double braces (escaping).
        auto double_brace_removal = [](String & str)
        {
            size_t i = 0;
            bool should_delete = true;
            str.erase(
                std::remove_if(
                    str.begin(),
                    str.end(),
                    [&i, &should_delete, &str](char)
                    {
                        bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
                        ++i;
                        if (is_double_brace && should_delete)
                        {
                            should_delete = false;
                            return true;
                        }
                        should_delete = true;
                        return false;
                    }),
                str.end());
        };
        index_positions.emplace_back();
        for (size_t i = 0; i < pattern.size(); ++i)
        {
            if (pattern[i] == '{')
            {
                /// Escaping handling
                /// It is safe to access because of null termination
                if (pattern[i + 1] == '{')
                {
                    ++i;
                    continue;
                }
                if (is_open_curly)
                    throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                String to_add = String(pattern.data() + start_pos, i - start_pos);
                double_brace_removal(to_add);
                if (!glue_to_next)
                    substrings.emplace_back(to_add);
                else
                    substrings.back() += to_add;
                glue_to_next = false;
                is_open_curly = true;
                last_open = i + 1;
            }
            else if (pattern[i] == '}')
            {
                if (pattern[i + 1] == '}')
                {
                    ++i;
                    continue;
                }
                if (!is_open_curly)
                    throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                is_open_curly = false;
                if (last_open == i)
                {
                    if (is_plain_numbering && !*is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = true;
                    if (index_if_plain >= argument_number)
                        throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
                    index_positions.back() = index_if_plain++;
                }
                else
                {
                    if (is_plain_numbering && *is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = false;
                    UInt64 arg;
                    parseNumber(pattern, last_open, i, arg, argument_number);
                    if (arg >= argument_number)
                        throw Exception(
                            "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
                    index_positions.back() = arg;
                }
                if (!constant_strings.empty() && constant_strings[index_positions.back()])
                {
                    /// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
                    glue_to_next = true;
                    substrings.back() += *constant_strings[index_positions.back()];
                }
                else
                    index_positions.emplace_back(); /// Otherwise we commit arg number and proceed.
                start_pos = i + 1;
            }
        }
        if (is_open_curly)
            throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
        String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
        double_brace_removal(to_add);
        if (!glue_to_next)
            substrings.emplace_back(to_add);
        else
            substrings.back() += to_add;
        index_positions.pop_back();
    }
 }
 }
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@ -52,23 +52,21 @@ public:
    {
        if (arguments.size() < 2)
            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                    + ", should be at least 2.",
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                getName(),
-
+                arguments.size());
        if (arguments.size() > FormatImpl::argument_threshold)
            throw Exception(
                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
                    + ", should be at most " + std::to_string(FormatImpl::argument_threshold),
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
        for (const auto arg_idx : collections::range(0, arguments.size()))
        {
            const auto * arg = arguments[arg_idx].get();
            if (!isStringOrFixedString(arg))
-                throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function "
+                throw Exception(
-                                    + getName(),
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+                    "Illegal type {} of argument {} of function {}",
                    arg->getName(),
                    arg_idx + 1,
                    getName());
        }
        return std::make_shared<DataTypeString>();
@ -125,7 +123,7 @@ private:
        std::vector<const ColumnString::Chars *> data(num_arguments);
        std::vector<const ColumnString::Offsets *> offsets(num_arguments);
        std::vector<size_t> fixed_string_sizes(num_arguments);
-        std::vector<String> constant_strings(num_arguments);
+        std::vector<std::optional<String>> constant_strings(num_arguments);
        bool has_column_string = false;
        bool has_column_fixed_string = false;
        for (size_t i = 0; i < num_arguments; ++i)
--- a/src/Functions/formatString.cpp
+++ b/src/Functions/formatString.cpp
@ -45,25 +45,23 @@ public:
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
-        if (arguments.empty())
+        if (arguments.size() < 2)
            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                    + ", should be at least 1",
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                getName(),
-
+                arguments.size());
        if (arguments.size() > FormatImpl::argument_threshold)
            throw Exception(
                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
                    + ", should be at most " + std::to_string(FormatImpl::argument_threshold),
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
        for (const auto arg_idx : collections::range(0, arguments.size()))
        {
            const auto * arg = arguments[arg_idx].get();
            if (!isStringOrFixedString(arg))
                throw Exception(
-                    "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(),
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                    "Illegal type {} of argument {} of function {}",
                    arg->getName(),
                    arg_idx + 1,
                    getName());
        }
        return std::make_shared<DataTypeString>();
@ -84,7 +82,7 @@ public:
        std::vector<const ColumnString::Chars *> data(arguments.size() - 1);
        std::vector<const ColumnString::Offsets *> offsets(arguments.size() - 1);
        std::vector<size_t> fixed_string_sizes(arguments.size() - 1);
-        std::vector<String> constant_strings(arguments.size() - 1);
+        std::vector<std::optional<String>> constant_strings(arguments.size() - 1);
        bool has_column_string = false;
        bool has_column_fixed_string = false;
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@ -4,8 +4,10 @@
 #include <base/types.h>
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/format.h>
 #include <Common/memcpySmall.h>
 #include <algorithm>
 #include <optional>
 #include <string>
@ -15,15 +17,9 @@
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
 }
 struct FormatImpl
 {
    static constexpr size_t small_argument_threshold = 1024;
    static constexpr size_t argument_threshold = std::numeric_limits<UInt32>::max();
    static constexpr size_t right_padding = 15;
    template <typename... Args>
@ -39,165 +35,10 @@ struct FormatImpl
            format<false, false>(std::forward<Args>(args)...);
    }
    static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res)
    {
        res = 0;
        for (UInt64 pos = l; pos < r; ++pos)
        {
            if (!isNumericASCII(description[pos]))
                throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
            res = res * 10 + description[pos] - '0';
            if (res >= argument_threshold)
                throw Exception(
                    "Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS);
        }
    }
    static inline void init(
        const String & pattern,
        const std::vector<const ColumnString::Chars *> & data,
        size_t argument_number,
        const std::vector<String> & constant_strings,
        UInt64 * index_positions_ptr,
        std::vector<String> & substrings)
    {
        /// Is current position after open curly brace.
        bool is_open_curly = false;
        /// The position of last open token.
        size_t last_open = -1;
        /// Is formatting in a plain {} token.
        std::optional<bool> is_plain_numbering;
        UInt64 index_if_plain = 0;
        /// Left position of adding substrings, just to the closed brace position or the start of the string.
        /// Invariant --- the start of substring is in this position.
        size_t start_pos = 0;
        /// A flag to decide whether we should glue the constant strings.
        bool glue_to_next = false;
        /// Handling double braces (escaping).
        auto double_brace_removal = [](String & str)
        {
            size_t i = 0;
            bool should_delete = true;
            str.erase(
                std::remove_if(
                    str.begin(),
                    str.end(),
                    [&i, &should_delete, &str](char)
                    {
                        bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
                        ++i;
                        if (is_double_brace && should_delete)
                        {
                            should_delete = false;
                            return true;
                        }
                        should_delete = true;
                        return false;
                    }),
                str.end());
        };
        for (size_t i = 0; i < pattern.size(); ++i)
        {
            if (pattern[i] == '{')
            {
                /// Escaping handling
                /// It is safe to access because of null termination
                if (pattern[i + 1] == '{')
                {
                    ++i;
                    continue;
                }
                if (is_open_curly)
                    throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                String to_add = String(pattern.data() + start_pos, i - start_pos);
                double_brace_removal(to_add);
                if (!glue_to_next)
                    substrings.emplace_back(to_add);
                else
                    substrings.back() += to_add;
                glue_to_next = false;
                is_open_curly = true;
                last_open = i + 1;
            }
            else if (pattern[i] == '}')
            {
                if (pattern[i + 1] == '}')
                {
                    ++i;
                    continue;
                }
                if (!is_open_curly)
                    throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                is_open_curly = false;
                if (last_open == i)
                {
                    if (is_plain_numbering && !*is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = true;
                    if (index_if_plain >= argument_number)
                        throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
                    *index_positions_ptr = index_if_plain++;
                }
                else
                {
                    if (is_plain_numbering && *is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = false;
                    UInt64 arg;
                    parseNumber(pattern, last_open, i, arg);
                    if (arg >= argument_number)
                        throw Exception(
                            "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
                    *index_positions_ptr = arg;
                }
                /// Constant string.
                if (!data[*index_positions_ptr])
                {
                    /// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
                    glue_to_next = true;
                    substrings.back() += constant_strings[*index_positions_ptr];
                }
                else
                    ++index_positions_ptr; /// Otherwise we commit arg number and proceed.
                start_pos = i + 1;
            }
        }
        if (is_open_curly)
            throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
        String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
        double_brace_removal(to_add);
        if (!glue_to_next)
            substrings.emplace_back(to_add);
        else
            substrings.back() += to_add;
    }
    /// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string.
    /// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString.
    /// fixed_string_N for savings N to fixed strings.
-    /// constant_strings for constant strings. If data[i] is nullptr, than it is constant string.
+    /// constant_strings for constant strings. If data[i] is nullptr, it is constant string.
    /// res_data is result_data, res_offsets is offset result.
    /// input_rows_count is the number of rows processed.
    /// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
@ -207,29 +48,22 @@ struct FormatImpl
        const std::vector<const ColumnString::Chars *> & data,
        const std::vector<const ColumnString::Offsets *> & offsets,
        [[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
-        const std::vector<String> & constant_strings,
+        const std::vector<std::optional<String>> & constant_strings,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets,
        size_t input_rows_count)
    {
        const size_t argument_number = offsets.size();
-        UInt64 small_index_positions_buffer[small_argument_threshold];
+        /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this
-        /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this array will be filled with [1, 3, 1, 0, ... (garbage)] but without constant string indices.
+        /// array will be filled with [1, 3, 1, 0] but without constant string indices.
-        UInt64 * index_positions = small_index_positions_buffer;
+        Format::IndexPositions index_positions;
        std::unique_ptr<UInt64[]> big_index_positions_buffer;
        if (argument_number > small_argument_threshold)
        {
            big_index_positions_buffer.reset(new UInt64[argument_number]);
            index_positions = big_index_positions_buffer.get();
        }
        /// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation.
        /// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``].
        std::vector<String> substrings;
-        init(pattern, data, argument_number, constant_strings, index_positions, substrings);
+        Format::init(pattern, argument_number, constant_strings, index_positions, substrings);
        UInt64 final_size = 0;
@ -271,7 +105,7 @@ struct FormatImpl
                for (size_t j = 1; j < substrings.size(); ++j)
                {
                    UInt64 arg = index_positions[j - 1];
-                    auto offset_ptr = offsets[arg];
+                    const auto * offset_ptr = offsets[arg];
                    UInt64 arg_offset = 0;
                    UInt64 size = 0;
--- a/tests/ci/cherry_pick_utils/parser.py
+++ b/tests/ci/cherry_pick_utils/parser.py
@ -20,8 +20,6 @@ class Description:
    def __init__(self, pull_request):
        self.label_name = str()
        self.legal = False
        self._parse(pull_request["bodyText"])
    def _parse(self, text):
@ -39,12 +37,6 @@ class Description:
                category = stripped
                next_category = False
            if (
                stripped
                == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
            ):
                self.legal = True
            category_headers = (
                "Category (leave one):",
                "Changelog category (leave one):",
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@ -59,3 +59,17 @@ def post_commit_status_to_file(file_path, description, state, report_url):
    with open(file_path, "w", encoding="utf-8") as f:
        out = csv.writer(f, delimiter="\t")
        out.writerow([state, report_url, description])
 def remove_labels(gh, pr_info, labels_names):
    repo = gh.get_repo(GITHUB_REPOSITORY)
    pull_request = repo.get_pull(pr_info.number)
    for label in labels_names:
        pull_request.remove_from_labels(label)
 def post_labels(gh, pr_info, labels_names):
    repo = gh.get_repo(GITHUB_REPOSITORY)
    pull_request = repo.get_pull(pr_info.number)
    for label in labels_names:
        pull_request.add_to_labels(label)
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@ -236,6 +236,15 @@ class PRInfo:
                return True
        return False
    def has_changes_in_submodules(self):
        if self.changed_files is None or not self.changed_files:
            return True
        for f in self.changed_files:
            if "contrib" in f:
                return True
        return False
    def can_skip_builds_and_use_version_from_master(self):
        # TODO: See a broken loop
        if "force tests" in self.labels:
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -8,7 +8,7 @@ from github import Github
 from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from pr_info import PRInfo
 from get_robot_token import get_best_robot_token
-from commit_status_helper import get_commit
+from commit_status_helper import get_commit, post_labels, remove_labels
 NAME = "Run Check (actions)"
@ -22,6 +22,7 @@ OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
 CAN_BE_TESTED_LABEL = "can be tested"
 DO_NOT_TEST_LABEL = "do not test"
 FORCE_TESTS_LABEL = "force tests"
 SUBMODULE_CHANGED_LABEL = "submodule changed"
 # Individual trusted contirbutors who are not in any trusted organization.
 # Can be changed in runtime: we will append users that we learned to be in
@ -81,6 +82,25 @@ TRUSTED_CONTRIBUTORS = {
    ]
 }
 MAP_CATEGORY_TO_LABEL = {
    "New Feature": "pr-feature",
    "Bug Fix": "pr-bugfix",
    "Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix",
    "Improvement": "pr-improvement",
    "Performance Improvement": "pr-performance",
    "Backward Incompatible Change": "pr-backward-incompatible",
    "Build/Testing/Packaging Improvement": "pr-build",
    "Build Improvement": "pr-build",
    "Build/Testing Improvement": "pr-build",
    "Build": "pr-build",
    "Packaging Improvement": "pr-build",
    "Not for changelog (changelog entry is not required)": "pr-not-for-changelog",
    "Not for changelog": "pr-not-for-changelog",
    "Documentation (changelog entry is not required)": "pr-documentation",
    "Documentation": "pr-documentation",
    # 'Other': doesn't match anything
 }
 def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
    if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@ -168,7 +188,7 @@ def check_pr_description(pr_info):
                    + second_category
                    + "'"
                )
-                return result_status[:140]
+                return result_status[:140], category
        elif re.match(
            r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
@ -190,30 +210,57 @@ def check_pr_description(pr_info):
            i += 1
    if not category:
-        return "Changelog category is empty"
+        return "Changelog category is empty", category
    # Filter out the PR categories that are not for changelog.
    if re.match(
        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
        category,
    ):
-        return ""
+        return "", category
    if not entry:
-        return f"Changelog entry required for category '{category}'"
+        return f"Changelog entry required for category '{category}'", category
-    return ""
+    return "", category
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
-    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True)
+    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
    can_run, description, labels_state = should_run_checks_for_pr(pr_info)
    gh = Github(get_best_robot_token())
    commit = get_commit(gh, pr_info.sha)
-    description_report = check_pr_description(pr_info)[:139]
+    description_report, category = check_pr_description(pr_info)
    pr_labels_to_add = []
    pr_labels_to_remove = []
    if (
        category in MAP_CATEGORY_TO_LABEL
        and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels
    ):
        pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category])
    for label in pr_info.labels:
        if (
            label in MAP_CATEGORY_TO_LABEL.values()
            and category in MAP_CATEGORY_TO_LABEL
            and label != MAP_CATEGORY_TO_LABEL[category]
        ):
            pr_labels_to_remove.append(label)
    if pr_info.has_changes_in_submodules():
        pr_labels_to_add.append(SUBMODULE_CHANGED_LABEL)
    elif SUBMODULE_CHANGED_LABEL in pr_info.labels:
        pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL)
    if pr_labels_to_add:
        post_labels(gh, pr_info, pr_labels_to_add)
    if pr_labels_to_remove:
        remove_labels(gh, pr_info, pr_labels_to_remove)
    if description_report:
        print("::notice ::Cannot run, description does not match the template")
        logging.info(
@ -225,7 +272,7 @@ if __name__ == "__main__":
        )
        commit.create_status(
            context=NAME,
-            description=description_report,
+            description=description_report[:139],
            state="failure",
            target_url=url,
        )
--- a/tests/queries/0_stateless/02245_format_string_stack_overflow.reference
+++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.reference
@ -0,0 +1 @@
 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
--- a/tests/queries/0_stateless/02245_format_string_stack_overflow.sql
+++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.sql
--- a/utils/github/init.py
+++ b/utils/github/init.py
@ -1 +0,0 @@
 # -*- coding: utf-8 -*-
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@ -1,185 +0,0 @@
 # -*- coding: utf-8 -*-
 try:
    from clickhouse.utils.github.cherrypick import CherryPick
    from clickhouse.utils.github.query import Query as RemoteRepo
    from clickhouse.utils.github.local import Repository as LocalRepo
 except:
    from .cherrypick import CherryPick
    from .query import Query as RemoteRepo
    from .local import Repository as LocalRepo
 import argparse
 import logging
 import re
 import sys
 class Backport:
    def __init__(self, token, owner, name, team):
        self._gh = RemoteRepo(
            token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
        )
        self._token = token
        self.default_branch_name = self._gh.default_branch
        self.ssh_url = self._gh.ssh_url
    def getPullRequests(self, from_commit):
        return self._gh.get_pull_requests(from_commit)
    def getBranchesWithRelease(self):
        branches = set()
        for pull_request in self._gh.find_pull_requests("release"):
            branches.add(pull_request["headRefName"])
        return branches
    def execute(self, repo, upstream, until_commit, run_cherrypick):
        repo = LocalRepo(repo, upstream, self.default_branch_name)
        all_branches = repo.get_release_branches()  # [(branch_name, base_commit)]
        release_branches = self.getBranchesWithRelease()
        branches = []
        # iterate over all branches to preserve their precedence.
        for branch in all_branches:
            if branch[0] in release_branches:
                branches.append(branch)
        if not branches:
            logging.info("No release branches found!")
            return
        for branch in branches:
            logging.info("Found release branch: %s", branch[0])
        if not until_commit:
            until_commit = branches[0][1]
        pull_requests = self.getPullRequests(until_commit)
        backport_map = {}
        RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
        RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
        RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
        # pull-requests are sorted by ancestry from the most recent.
        for pr in pull_requests:
            while repo.comparator(branches[-1][1]) >= repo.comparator(
                pr["mergeCommit"]["oid"]
            ):
                logging.info(
                    "PR #{} is already inside {}. Dropping this branch for further PRs".format(
                        pr["number"], branches[-1][0]
                    )
                )
                branches.pop()
            logging.info("Processing PR #{}".format(pr["number"]))
            assert len(branches)
            branch_set = set([branch[0] for branch in branches])
            # First pass. Find all must-backports
            for label in pr["labels"]["nodes"]:
                if label["name"] == "pr-must-backport":
                    backport_map[pr["number"]] = branch_set.copy()
                    continue
                matched = RE_MUST_BACKPORT.match(label["name"])
                if matched:
                    if pr["number"] not in backport_map:
                        backport_map[pr["number"]] = set()
                    backport_map[pr["number"]].add(matched.group(1))
            # Second pass. Find all no-backports
            for label in pr["labels"]["nodes"]:
                if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
                    del backport_map[pr["number"]]
                    break
                matched_no_backport = RE_NO_BACKPORT.match(label["name"])
                matched_backported = RE_BACKPORTED.match(label["name"])
                if (
                    matched_no_backport
                    and pr["number"] in backport_map
                    and matched_no_backport.group(1) in backport_map[pr["number"]]
                ):
                    backport_map[pr["number"]].remove(matched_no_backport.group(1))
                    logging.info(
                        "\tskipping %s because of forced no-backport",
                        matched_no_backport.group(1),
                    )
                elif (
                    matched_backported
                    and pr["number"] in backport_map
                    and matched_backported.group(1) in backport_map[pr["number"]]
                ):
                    backport_map[pr["number"]].remove(matched_backported.group(1))
                    logging.info(
                        "\tskipping %s because it's already backported manually",
                        matched_backported.group(1),
                    )
        for pr, branches in list(backport_map.items()):
            logging.info("PR #%s needs to be backported to:", pr)
            for branch in branches:
                logging.info(
                    "\t%s, and the status is: %s",
                    branch,
                    run_cherrypick(self._token, pr, branch),
                )
        # print API costs
        logging.info("\nGitHub API total costs per query:")
        for name, value in list(self._gh.api_costs.items()):
            logging.info("%s : %s", name, value)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--token", type=str, required=True, help="token for Github access"
    )
    parser.add_argument(
        "--repo",
        type=str,
        required=True,
        help="path to full repository",
        metavar="PATH",
    )
    parser.add_argument(
        "--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="do not create or merge any PRs",
        default=False,
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="more verbose output",
        default=False,
    )
    parser.add_argument(
        "--upstream",
        "-u",
        type=str,
        help="remote name of upstream in repository",
        default="origin",
    )
    args = parser.parse_args()
    if args.verbose:
        logging.basicConfig(
            format="%(message)s", stream=sys.stdout, level=logging.DEBUG
        )
    else:
        logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
    cherrypick_run = lambda token, pr, branch: CherryPick(
        token, "ClickHouse", "ClickHouse", "core", pr, branch
    ).execute(args.repo, args.dry_run)
    bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
    bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
--- a/utils/github/cherrypick.py
+++ b/utils/github/cherrypick.py
@ -1,323 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Backports changes from PR to release branch.
 Requires multiple separate runs as part of the implementation.
 First run should do the following:
 1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr})
 2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit})
 3. Create PR from temporary branch to backport branch (emulating cherry-pick).
 Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
 Third run creates PR from backport branch (with merged previous PR) to release branch.
 """
 try:
    from clickhouse.utils.github.query import Query as RemoteRepo
 except:
    from .query import Query as RemoteRepo
 import argparse
 from enum import Enum
 import logging
 import os
 import subprocess
 import sys
 class CherryPick:
    class Status(Enum):
        DISCARDED = "discarded"
        NOT_INITIATED = "not started"
        FIRST_MERGEABLE = "waiting for 1st stage"
        FIRST_CONFLICTS = "conflicts on 1st stage"
        SECOND_MERGEABLE = "waiting for 2nd stage"
        SECOND_CONFLICTS = "conflicts on 2nd stage"
        MERGED = "backported"
    def _run(self, args):
        out = subprocess.check_output(args).rstrip()
        logging.debug(out)
        return out
    def __init__(self, token, owner, name, team, pr_number, target_branch):
        self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
        self._pr = self._gh.get_pull_request(pr_number)
        self.ssh_url = self._gh.ssh_url
        # TODO: check if pull-request is merged.
        self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
        self.target_branch = target_branch
        self.backport_branch = "backport/{branch}/{pr}".format(
            branch=target_branch, pr=pr_number
        )
        self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
            branch=target_branch, oid=self.merge_commit_oid
        )
    def getCherryPickPullRequest(self):
        return self._gh.find_pull_request(
            base=self.backport_branch, head=self.cherrypick_branch
        )
    def createCherryPickPullRequest(self, repo_path):
        DESCRIPTION = (
            "This pull-request is a first step of an automated backporting.\n"
            "It contains changes like after calling a local command `git cherry-pick`.\n"
            "If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
            "Otherwise, if you do not want to backport them, then just close this pull-request.\n"
            "\n"
            "The check results does not matter at this step - you can safely ignore them.\n"
            "Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
        )
        # FIXME: replace with something better than os.system()
        git_prefix = [
            "git",
            "-C",
            repo_path,
            "-c",
            "user.email=robot-clickhouse@yandex-team.ru",
            "-c",
            "user.name=robot-clickhouse",
        ]
        base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
        # Create separate branch for backporting, and make it look like real cherry-pick.
        self._run(git_prefix + ["checkout", "-f", self.target_branch])
        self._run(git_prefix + ["checkout", "-B", self.backport_branch])
        self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
        # Create secondary branch to allow pull request with cherry-picked commit.
        self._run(
            git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
        )
        self._run(
            git_prefix
            + [
                "push",
                "-f",
                "origin",
                "{branch}:{branch}".format(branch=self.backport_branch),
            ]
        )
        self._run(
            git_prefix
            + [
                "push",
                "-f",
                "origin",
                "{branch}:{branch}".format(branch=self.cherrypick_branch),
            ]
        )
        # Create pull-request like a local cherry-pick
        pr = self._gh.create_pull_request(
            source=self.cherrypick_branch,
            target=self.backport_branch,
            title="Cherry pick #{number} to {target}: {title}".format(
                number=self._pr["number"],
                target=self.target_branch,
                title=self._pr["title"].replace('"', '\\"'),
            ),
            description="Original pull-request #{}\n\n{}".format(
                self._pr["number"], DESCRIPTION
            ),
        )
        # FIXME: use `team` to leave a single eligible assignee.
        self._gh.add_assignee(pr, self._pr["author"])
        self._gh.add_assignee(pr, self._pr["mergedBy"])
        self._gh.set_label(pr, "do not test")
        self._gh.set_label(pr, "pr-cherrypick")
        return pr
    def mergeCherryPickPullRequest(self, cherrypick_pr):
        return self._gh.merge_pull_request(cherrypick_pr["id"])
    def getBackportPullRequest(self):
        return self._gh.find_pull_request(
            base=self.target_branch, head=self.backport_branch
        )
    def createBackportPullRequest(self, cherrypick_pr, repo_path):
        DESCRIPTION = (
            "This pull-request is a last step of an automated backporting.\n"
            "Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
            "Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
        )
        git_prefix = [
            "git",
            "-C",
            repo_path,
            "-c",
            "user.email=robot-clickhouse@clickhouse.com",
            "-c",
            "user.name=robot-clickhouse",
        ]
        pr_title = "Backport #{number} to {target}: {title}".format(
            number=self._pr["number"],
            target=self.target_branch,
            title=self._pr["title"].replace('"', '\\"'),
        )
        self._run(git_prefix + ["checkout", "-f", self.backport_branch])
        self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
        self._run(
            git_prefix
            + [
                "reset",
                "--soft",
                self._run(
                    git_prefix
                    + [
                        "merge-base",
                        "origin/" + self.target_branch,
                        self.backport_branch,
                    ]
                ),
            ]
        )
        self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
        self._run(
            git_prefix
            + [
                "push",
                "-f",
                "origin",
                "{branch}:{branch}".format(branch=self.backport_branch),
            ]
        )
        pr = self._gh.create_pull_request(
            source=self.backport_branch,
            target=self.target_branch,
            title=pr_title,
            description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
                self._pr["number"], cherrypick_pr["number"], DESCRIPTION
            ),
        )
        # FIXME: use `team` to leave a single eligible assignee.
        self._gh.add_assignee(pr, self._pr["author"])
        self._gh.add_assignee(pr, self._pr["mergedBy"])
        self._gh.set_label(pr, "pr-backport")
        return pr
    def execute(self, repo_path, dry_run=False):
        pr1 = self.getCherryPickPullRequest()
        if not pr1:
            if not dry_run:
                pr1 = self.createCherryPickPullRequest(repo_path)
                logging.debug(
                    "Created PR with cherry-pick of %s to %s: %s",
                    self._pr["number"],
                    self.target_branch,
                    pr1["url"],
                )
            else:
                return CherryPick.Status.NOT_INITIATED
        else:
            logging.debug(
                "Found PR with cherry-pick of %s to %s: %s",
                self._pr["number"],
                self.target_branch,
                pr1["url"],
            )
        if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
            if not dry_run:
                pr1 = self.mergeCherryPickPullRequest(pr1)
                logging.debug(
                    "Merged PR with cherry-pick of %s to %s: %s",
                    self._pr["number"],
                    self.target_branch,
                    pr1["url"],
                )
        if not pr1["merged"]:
            logging.debug(
                "Waiting for PR with cherry-pick of %s to %s: %s",
                self._pr["number"],
                self.target_branch,
                pr1["url"],
            )
            if pr1["closed"]:
                return CherryPick.Status.DISCARDED
            elif pr1["mergeable"] == "CONFLICTING":
                return CherryPick.Status.FIRST_CONFLICTS
            else:
                return CherryPick.Status.FIRST_MERGEABLE
        pr2 = self.getBackportPullRequest()
        if not pr2:
            if not dry_run:
                pr2 = self.createBackportPullRequest(pr1, repo_path)
                logging.debug(
                    "Created PR with backport of %s to %s: %s",
                    self._pr["number"],
                    self.target_branch,
                    pr2["url"],
                )
            else:
                return CherryPick.Status.FIRST_MERGEABLE
        else:
            logging.debug(
                "Found PR with backport of %s to %s: %s",
                self._pr["number"],
                self.target_branch,
                pr2["url"],
            )
        if pr2["merged"]:
            return CherryPick.Status.MERGED
        elif pr2["closed"]:
            return CherryPick.Status.DISCARDED
        elif pr2["mergeable"] == "CONFLICTING":
            return CherryPick.Status.SECOND_CONFLICTS
        else:
            return CherryPick.Status.SECOND_MERGEABLE
 if __name__ == "__main__":
    logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--token", "-t", type=str, required=True, help="token for Github access"
    )
    parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
    parser.add_argument(
        "--branch",
        "-b",
        type=str,
        required=True,
        help="target branch name for cherry-pick",
    )
    parser.add_argument(
        "--repo",
        "-r",
        type=str,
        required=True,
        help="path to full repository",
        metavar="PATH",
    )
    args = parser.parse_args()
    cp = CherryPick(
        args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
    )
    cp.execute(args.repo)
--- a/utils/github/local.py
+++ b/utils/github/local.py
@ -1,108 +0,0 @@
 # -*- coding: utf-8 -*-
 import functools
 import logging
 import os
 import re
 class RepositoryBase:
    def __init__(self, repo_path):
        import git
        self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
        # comparator of commits
        def cmp(x, y):
            if str(x) == str(y):
                return 0
            if self._repo.is_ancestor(x, y):
                return -1
            else:
                return 1
        self.comparator = functools.cmp_to_key(cmp)
    def get_head_commit(self):
        return self._repo.commit(self._default)
    def iterate(self, begin, end):
        rev_range = "{}...{}".format(begin, end)
        for commit in self._repo.iter_commits(rev_range, first_parent=True):
            yield commit
 class Repository(RepositoryBase):
    def __init__(self, repo_path, remote_name, default_branch_name):
        super(Repository, self).__init__(repo_path)
        self._remote = self._repo.remotes[remote_name]
        self._remote.fetch()
        self._default = self._remote.refs[default_branch_name]
    def get_release_branches(self):
        """
        Returns sorted list of tuples:
         * remote branch (git.refs.remote.RemoteReference),
         * base commit (git.Commit),
         * head (git.Commit)).
        List is sorted by commits in ascending order.
        """
        release_branches = []
        RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
        for branch in [
            r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
        ]:
            base = self._repo.merge_base(self._default, self._repo.commit(branch))
            if not base:
                logging.info(
                    "Branch %s is not based on branch %s. Ignoring.",
                    branch.path,
                    self._default,
                )
            elif len(base) > 1:
                logging.info(
                    "Branch %s has more than one base commit. Ignoring.", branch.path
                )
            else:
                release_branches.append((os.path.basename(branch.name), base[0]))
        return sorted(release_branches, key=lambda x: self.comparator(x[1]))
 class BareRepository(RepositoryBase):
    def __init__(self, repo_path, default_branch_name):
        super(BareRepository, self).__init__(repo_path)
        self._default = self._repo.branches[default_branch_name]
    def get_release_branches(self):
        """
        Returns sorted list of tuples:
         * branch (git.refs.head?),
         * base commit (git.Commit),
         * head (git.Commit)).
        List is sorted by commits in ascending order.
        """
        release_branches = []
        RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
        for branch in [
            r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
        ]:
            base = self._repo.merge_base(self._default, self._repo.commit(branch))
            if not base:
                logging.info(
                    "Branch %s is not based on branch %s. Ignoring.",
                    branch.path,
                    self._default,
                )
            elif len(base) > 1:
                logging.info(
                    "Branch %s has more than one base commit. Ignoring.", branch.path
                )
            else:
                release_branches.append((os.path.basename(branch.name), base[0]))
        return sorted(release_branches, key=lambda x: self.comparator(x[1]))
--- a/utils/github/parser.py
+++ b/utils/github/parser.py
@ -1,64 +0,0 @@
 # -*- coding: utf-8 -*-
 class Description:
    """Parsed description representation"""
    MAP_CATEGORY_TO_LABEL = {
        "New Feature": "pr-feature",
        "Bug Fix": "pr-bugfix",
        "Improvement": "pr-improvement",
        "Performance Improvement": "pr-performance",
        # 'Backward Incompatible Change': doesn't match anything
        "Build/Testing/Packaging Improvement": "pr-build",
        "Non-significant (changelog entry is not needed)": "pr-non-significant",
        "Non-significant (changelog entry is not required)": "pr-non-significant",
        "Non-significant": "pr-non-significant",
        "Documentation (changelog entry is not required)": "pr-documentation",
        # 'Other': doesn't match anything
    }
    def __init__(self, pull_request):
        self.label_name = str()
        self.legal = False
        self._parse(pull_request["bodyText"])
    def _parse(self, text):
        lines = text.splitlines()
        next_category = False
        category = str()
        for line in lines:
            stripped = line.strip()
            if not stripped:
                continue
            if next_category:
                category = stripped
                next_category = False
            if (
                stripped
                == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
            ):
                self.legal = True
            category_headers = (
                "Category (leave one):",
                "Changelog category (leave one):",
                "Changelog category:",
                "Category:",
            )
            if stripped in category_headers:
                next_category = True
        if category in Description.MAP_CATEGORY_TO_LABEL:
            self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
        else:
            if not category:
                print("Cannot find category in pr description")
            else:
                print(("Unknown category: " + category))
--- a/utils/github/query.py
+++ b/utils/github/query.py
@ -1,492 +0,0 @@
 # -*- coding: utf-8 -*-
 import requests
 class Query:
    """
    Implements queries to the Github API using GraphQL
    """
    _PULL_REQUEST = """
        author {{
            ... on User {{
                id
                login
            }}
        }}
        baseRepository {{
            nameWithOwner
        }}
        mergeCommit {{
            oid
            parents(first: {min_page_size}) {{
                totalCount
                nodes {{
                    oid
                }}
            }}
        }}
        mergedBy {{
            ... on User {{
                id
                login
            }}
        }}
        baseRefName
        closed
        headRefName
        id
        mergeable
        merged
        number
        title
        url
    """
    def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
        self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
        self._token = token
        self._owner = owner
        self._name = name
        self._team = team
        self._max_page_size = max_page_size
        self._min_page_size = min_page_size
        self.api_costs = {}
        repo = self.get_repository()
        self._id = repo["id"]
        self.ssh_url = repo["sshUrl"]
        self.default_branch = repo["defaultBranchRef"]["name"]
        self.members = set(self.get_members())
    def get_repository(self):
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                defaultBranchRef {{
                    name
                }}
                id
                sshUrl
            }}
        """
        query = _QUERY.format(owner=self._owner, name=self._name)
        return self._run(query)["repository"]
    def get_members(self):
        """Get all team members for organization
        Returns:
            members: a map of members' logins to ids
        """
        _QUERY = """
            organization(login: "{organization}") {{
                team(slug: "{team}") {{
                    members(first: {max_page_size} {next}) {{
                        pageInfo {{
                            hasNextPage
                            endCursor
                        }}
                        nodes {{
                            id
                            login
                        }}
                    }}
                }}
            }}
        """
        members = {}
        not_end = True
        query = _QUERY.format(
            organization=self._owner,
            team=self._team,
            max_page_size=self._max_page_size,
            next="",
        )
        while not_end:
            result = self._run(query)["organization"]["team"]
            if result is None:
                break
            result = result["members"]
            not_end = result["pageInfo"]["hasNextPage"]
            query = _QUERY.format(
                organization=self._owner,
                team=self._team,
                max_page_size=self._max_page_size,
                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
            )
            members += dict([(node["login"], node["id"]) for node in result["nodes"]])
        return members
    def get_pull_request(self, number):
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                pullRequest(number: {number}) {{
                    {pull_request_data}
                }}
            }}
        """
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            number=number,
            pull_request_data=self._PULL_REQUEST,
            min_page_size=self._min_page_size,
        )
        return self._run(query)["repository"]["pullRequest"]
    def find_pull_request(self, base, head):
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
                    nodes {{
                        {pull_request_data}
                    }}
                    totalCount
                }}
            }}
        """
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            base=base,
            head=head,
            pull_request_data=self._PULL_REQUEST,
            min_page_size=self._min_page_size,
        )
        result = self._run(query)["repository"]["pullRequests"]
        if result["totalCount"] > 0:
            return result["nodes"][0]
        else:
            return {}
    def find_pull_requests(self, label_name):
        """
        Get all pull-requests filtered by label name
        """
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
                    nodes {{
                        {pull_request_data}
                    }}
                }}
            }}
        """
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            label_name=label_name,
            pull_request_data=self._PULL_REQUEST,
            min_page_size=self._min_page_size,
        )
        return self._run(query)["repository"]["pullRequests"]["nodes"]
    def get_pull_requests(self, before_commit):
        """
        Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
        """
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                defaultBranchRef {{
                    target {{
                        ... on Commit {{
                            history(first: {max_page_size} {next}) {{
                                pageInfo {{
                                    hasNextPage
                                    endCursor
                                }}
                                nodes {{
                                    oid
                                    associatedPullRequests(first: {min_page_size}) {{
                                        totalCount
                                        nodes {{
                                            ... on PullRequest {{
                                                {pull_request_data}
                                                labels(first: {min_page_size}) {{
                                                    totalCount
                                                    pageInfo {{
                                                        hasNextPage
                                                        endCursor
                                                    }}
                                                    nodes {{
                                                        name
                                                        color
                                                    }}
                                                }}
                                            }}
                                        }}
                                    }}
                                }}
                            }}
                        }}
                    }}
                }}
            }}
        """
        pull_requests = []
        not_end = True
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            max_page_size=self._max_page_size,
            min_page_size=self._min_page_size,
            pull_request_data=self._PULL_REQUEST,
            next="",
        )
        while not_end:
            result = self._run(query)["repository"]["defaultBranchRef"]["target"][
                "history"
            ]
            not_end = result["pageInfo"]["hasNextPage"]
            query = _QUERY.format(
                owner=self._owner,
                name=self._name,
                max_page_size=self._max_page_size,
                min_page_size=self._min_page_size,
                pull_request_data=self._PULL_REQUEST,
                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
            )
            for commit in result["nodes"]:
                # FIXME: maybe include `before_commit`?
                if str(commit["oid"]) == str(before_commit):
                    not_end = False
                    break
                # TODO: fetch all pull-requests that were merged in a single commit.
                assert (
                    commit["associatedPullRequests"]["totalCount"]
                    <= self._min_page_size
                )
                for pull_request in commit["associatedPullRequests"]["nodes"]:
                    if (
                        pull_request["baseRepository"]["nameWithOwner"]
                        == "{}/{}".format(self._owner, self._name)
                        and pull_request["baseRefName"] == self.default_branch
                        and pull_request["mergeCommit"]["oid"] == commit["oid"]
                    ):
                        pull_requests.append(pull_request)
        return pull_requests
    def create_pull_request(
        self, source, target, title, description="", draft=False, can_modify=True
    ):
        _QUERY = """
            createPullRequest(input: {{
                baseRefName: "{target}",
                headRefName: "{source}",
                repositoryId: "{id}",
                title: "{title}",
                body: "{body}",
                draft: {draft},
                maintainerCanModify: {modify}
            }}) {{
                pullRequest {{
                    {pull_request_data}
                }}
            }}
        """
        query = _QUERY.format(
            target=target,
            source=source,
            id=self._id,
            title=title,
            body=description,
            draft="true" if draft else "false",
            modify="true" if can_modify else "false",
            pull_request_data=self._PULL_REQUEST,
        )
        return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
    def merge_pull_request(self, id):
        _QUERY = """
            mergePullRequest(input: {{
                pullRequestId: "{id}"
            }}) {{
                pullRequest {{
                    {pull_request_data}
                }}
            }}
        """
        query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
        return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
    # FIXME: figure out how to add more assignees at once
    def add_assignee(self, pr, assignee):
        _QUERY = """
            addAssigneesToAssignable(input: {{
                assignableId: "{id1}",
                assigneeIds: "{id2}"
            }}) {{
                clientMutationId
            }}
        """
        query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
        self._run(query, is_mutation=True)
    def set_label(self, pull_request, label_name):
        """
        Set label by name to the pull request
        Args:
            pull_request: JSON object returned by `get_pull_requests()`
            label_name (string): label name
        """
        _GET_LABEL = """
            repository(owner: "{owner}" name: "{name}") {{
                labels(first: {max_page_size} {next} query: "{label_name}") {{
                    pageInfo {{
                        hasNextPage
                        endCursor
                    }}
                    nodes {{
                        id
                        name
                        color
                    }}
                }}
            }}
        """
        _SET_LABEL = """
            addLabelsToLabelable(input: {{
                labelableId: "{pr_id}",
                labelIds: "{label_id}"
            }}) {{
                clientMutationId
            }}
        """
        labels = []
        not_end = True
        query = _GET_LABEL.format(
            owner=self._owner,
            name=self._name,
            label_name=label_name,
            max_page_size=self._max_page_size,
            next="",
        )
        while not_end:
            result = self._run(query)["repository"]["labels"]
            not_end = result["pageInfo"]["hasNextPage"]
            query = _GET_LABEL.format(
                owner=self._owner,
                name=self._name,
                label_name=label_name,
                max_page_size=self._max_page_size,
                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
            )
            labels += [label for label in result["nodes"]]
        if not labels:
            return
        query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
        self._run(query, is_mutation=True)
    def _run(self, query, is_mutation=False):
        from requests.adapters import HTTPAdapter
        from urllib3.util.retry import Retry
        def requests_retry_session(
            retries=3,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504),
            session=None,
        ):
            session = session or requests.Session()
            retry = Retry(
                total=retries,
                read=retries,
                connect=retries,
                backoff_factor=backoff_factor,
                status_forcelist=status_forcelist,
            )
            adapter = HTTPAdapter(max_retries=retry)
            session.mount("http://", adapter)
            session.mount("https://", adapter)
            return session
        headers = {"Authorization": "bearer {}".format(self._token)}
        if is_mutation:
            query = """
            mutation {{
                {query}
            }}
            """.format(
                query=query
            )
        else:
            query = """
            query {{
                {query}
                rateLimit {{
                    cost
                    remaining
                }}
            }}
            """.format(
                query=query
            )
        while True:
            request = requests_retry_session().post(
                "https://api.github.com/graphql", json={"query": query}, headers=headers
            )
            if request.status_code == 200:
                result = request.json()
                if "errors" in result:
                    raise Exception(
                        "Errors occurred: {}\nOriginal query: {}".format(
                            result["errors"], query
                        )
                    )
                if not is_mutation:
                    import inspect
                    caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
                    if caller not in list(self.api_costs.keys()):
                        self.api_costs[caller] = 0
                    self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
                return result["data"]
            else:
                import json
                raise Exception(
                    "Query failed with code {code}:\n{json}".format(
                        code=request.status_code,
                        json=json.dumps(request.json(), indent=4),
                    )
                )
		`@ -0,0 +1 @@`
							00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000