Merge branch 'master' into is-secure-client-info

2024-11-25 00:52:02 +00:00 · 2022-03-30 11:26:05 +00:00 · 2022-03-30 11:26:05 +00:00 · 7bc5731025
commit 7bc5731025
parent 87bab4f2ea 31c367d3cd
17 changed files with 290 additions and 1401 deletions
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -149,7 +149,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH"
  SplitBuildSmokeTest:
    needs: [BuilderDebSplitted]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Set envs
@ -316,7 +315,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinRelease:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -362,7 +360,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinGCC:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -636,7 +633,6 @@ jobs:
 ##########################################################################################
  BuilderDebSplitted:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -682,7 +678,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinTidy:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -728,7 +723,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinDarwin:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -774,7 +768,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinAarch64:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -820,7 +813,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinFreeBSD:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -866,7 +858,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinDarwinAarch64:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -912,7 +903,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinPPC64:
    needs: [DockerHubPush]
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
--- a/src/Common/format.h
+++ b/src/Common/format.h
@ -0,0 +1,178 @@
+#pragma once
+
+#include <base/types.h>
+#include <Common/Exception.h>
+#include <Common/PODArray.h>
+#include <Common/StringUtils/StringUtils.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace Format
+{
+    using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
+
+    static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
+    {
+        res = 0;
+        for (UInt64 pos = l; pos < r; ++pos)
+        {
+            if (!isNumericASCII(description[pos]))
+                throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
+            res = res * 10 + description[pos] - '0';
+            if (res >= argument_number)
+                throw Exception(
+                    "Too big number for arguments, must be at most " + std::to_string(argument_number - 1), ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+
+    static inline void init(
+        const String & pattern,
+        size_t argument_number,
+        const std::vector<std::optional<String>> & constant_strings,
+        IndexPositions & index_positions,
+        std::vector<String> & substrings)
+    {
+        /// Is current position after open curly brace.
+        bool is_open_curly = false;
+        /// The position of last open token.
+        size_t last_open = -1;
+
+        /// Is formatting in a plain {} token.
+        std::optional<bool> is_plain_numbering;
+        UInt64 index_if_plain = 0;
+
+        /// Left position of adding substrings, just to the closed brace position or the start of the string.
+        /// Invariant --- the start of substring is in this position.
+        size_t start_pos = 0;
+
+        /// A flag to decide whether we should glue the constant strings.
+        bool glue_to_next = false;
+
+        /// Handling double braces (escaping).
+        auto double_brace_removal = [](String & str)
+        {
+            size_t i = 0;
+            bool should_delete = true;
+            str.erase(
+                std::remove_if(
+                    str.begin(),
+                    str.end(),
+                    [&i, &should_delete, &str](char)
+                    {
+                        bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
+                        ++i;
+                        if (is_double_brace && should_delete)
+                        {
+                            should_delete = false;
+                            return true;
+                        }
+                        should_delete = true;
+                        return false;
+                    }),
+                str.end());
+        };
+
+        index_positions.emplace_back();
+
+        for (size_t i = 0; i < pattern.size(); ++i)
+        {
+            if (pattern[i] == '{')
+            {
+                /// Escaping handling
+                /// It is safe to access because of null termination
+                if (pattern[i + 1] == '{')
+                {
+                    ++i;
+                    continue;
+                }
+
+                if (is_open_curly)
+                    throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
+
+                String to_add = String(pattern.data() + start_pos, i - start_pos);
+                double_brace_removal(to_add);
+                if (!glue_to_next)
+                    substrings.emplace_back(to_add);
+                else
+                    substrings.back() += to_add;
+
+                glue_to_next = false;
+
+                is_open_curly = true;
+                last_open = i + 1;
+            }
+            else if (pattern[i] == '}')
+            {
+                if (pattern[i + 1] == '}')
+                {
+                    ++i;
+                    continue;
+                }
+
+                if (!is_open_curly)
+                    throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
+
+                is_open_curly = false;
+
+                if (last_open == i)
+                {
+                    if (is_plain_numbering && !*is_plain_numbering)
+                        throw Exception(
+                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
+                    is_plain_numbering = true;
+                    if (index_if_plain >= argument_number)
+                        throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
+                    index_positions.back() = index_if_plain++;
+                }
+                else
+                {
+                    if (is_plain_numbering && *is_plain_numbering)
+                        throw Exception(
+                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
+                    is_plain_numbering = false;
+
+                    UInt64 arg;
+                    parseNumber(pattern, last_open, i, arg, argument_number);
+
+                    if (arg >= argument_number)
+                        throw Exception(
+                            "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
+
+                    index_positions.back() = arg;
+                }
+
+                if (!constant_strings.empty() && constant_strings[index_positions.back()])
+                {
+                    /// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
+                    glue_to_next = true;
+                    substrings.back() += *constant_strings[index_positions.back()];
+                }
+                else
+                    index_positions.emplace_back(); /// Otherwise we commit arg number and proceed.
+
+                start_pos = i + 1;
+            }
+        }
+
+        if (is_open_curly)
+            throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
+
+        String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
+        double_brace_removal(to_add);
+
+        if (!glue_to_next)
+            substrings.emplace_back(to_add);
+        else
+            substrings.back() += to_add;
+
+        index_positions.pop_back();
+    }
+}
+
+}
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@ -52,23 +52,21 @@ public:
    {
        if (arguments.size() < 2)
            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
-                    + ", should be at least 2.",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-
-        if (arguments.size() > FormatImpl::argument_threshold)
-            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
-                    + ", should be at most " + std::to_string(FormatImpl::argument_threshold),
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
+                getName(),
+                arguments.size());

        for (const auto arg_idx : collections::range(0, arguments.size()))
        {
            const auto * arg = arguments[arg_idx].get();
            if (!isStringOrFixedString(arg))
-                throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function "
-                                    + getName(),
-                                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of argument {} of function {}",
+                    arg->getName(),
+                    arg_idx + 1,
+                    getName());
        }

        return std::make_shared<DataTypeString>();
@ -125,7 +123,7 @@ private:
        std::vector<const ColumnString::Chars *> data(num_arguments);
        std::vector<const ColumnString::Offsets *> offsets(num_arguments);
        std::vector<size_t> fixed_string_sizes(num_arguments);
-        std::vector<String> constant_strings(num_arguments);
+        std::vector<std::optional<String>> constant_strings(num_arguments);
        bool has_column_string = false;
        bool has_column_fixed_string = false;
        for (size_t i = 0; i < num_arguments; ++i)
--- a/src/Functions/formatString.cpp
+++ b/src/Functions/formatString.cpp
@ -45,25 +45,23 @@ public:

    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
-        if (arguments.empty())
+        if (arguments.size() < 2)
            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
-                    + ", should be at least 1",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-
-        if (arguments.size() > FormatImpl::argument_threshold)
-            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
-                    + ", should be at most " + std::to_string(FormatImpl::argument_threshold),
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
+                getName(),
+                arguments.size());

        for (const auto arg_idx : collections::range(0, arguments.size()))
        {
            const auto * arg = arguments[arg_idx].get();
            if (!isStringOrFixedString(arg))
                throw Exception(
-                    "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(),
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of argument {} of function {}",
+                    arg->getName(),
+                    arg_idx + 1,
+                    getName());
        }

        return std::make_shared<DataTypeString>();
@ -84,7 +82,7 @@ public:
        std::vector<const ColumnString::Chars *> data(arguments.size() - 1);
        std::vector<const ColumnString::Offsets *> offsets(arguments.size() - 1);
        std::vector<size_t> fixed_string_sizes(arguments.size() - 1);
-        std::vector<String> constant_strings(arguments.size() - 1);
+        std::vector<std::optional<String>> constant_strings(arguments.size() - 1);

        bool has_column_string = false;
        bool has_column_fixed_string = false;
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@ -4,8 +4,10 @@
 #include <base/types.h>
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Common/format.h>
 #include <Common/memcpySmall.h>

+
 #include <algorithm>
 #include <optional>
 #include <string>
@ -15,15 +17,9 @@

 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}

 struct FormatImpl
 {
-    static constexpr size_t small_argument_threshold = 1024;
-    static constexpr size_t argument_threshold = std::numeric_limits<UInt32>::max();
    static constexpr size_t right_padding = 15;

    template <typename... Args>
@ -39,165 +35,10 @@ struct FormatImpl
            format<false, false>(std::forward<Args>(args)...);
    }

-    static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res)
-    {
-        res = 0;
-        for (UInt64 pos = l; pos < r; ++pos)
-        {
-            if (!isNumericASCII(description[pos]))
-                throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
-            res = res * 10 + description[pos] - '0';
-            if (res >= argument_threshold)
-                throw Exception(
-                    "Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS);
-        }
-    }
-
-    static inline void init(
-        const String & pattern,
-        const std::vector<const ColumnString::Chars *> & data,
-        size_t argument_number,
-        const std::vector<String> & constant_strings,
-        UInt64 * index_positions_ptr,
-        std::vector<String> & substrings)
-    {
-        /// Is current position after open curly brace.
-        bool is_open_curly = false;
-        /// The position of last open token.
-        size_t last_open = -1;
-
-        /// Is formatting in a plain {} token.
-        std::optional<bool> is_plain_numbering;
-        UInt64 index_if_plain = 0;
-
-        /// Left position of adding substrings, just to the closed brace position or the start of the string.
-        /// Invariant --- the start of substring is in this position.
-        size_t start_pos = 0;
-
-        /// A flag to decide whether we should glue the constant strings.
-        bool glue_to_next = false;
-
-        /// Handling double braces (escaping).
-        auto double_brace_removal = [](String & str)
-        {
-            size_t i = 0;
-            bool should_delete = true;
-            str.erase(
-                std::remove_if(
-                    str.begin(),
-                    str.end(),
-                    [&i, &should_delete, &str](char)
-                    {
-                        bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
-                        ++i;
-                        if (is_double_brace && should_delete)
-                        {
-                            should_delete = false;
-                            return true;
-                        }
-                        should_delete = true;
-                        return false;
-                    }),
-                str.end());
-        };
-
-        for (size_t i = 0; i < pattern.size(); ++i)
-        {
-            if (pattern[i] == '{')
-            {
-                /// Escaping handling
-                /// It is safe to access because of null termination
-                if (pattern[i + 1] == '{')
-                {
-                    ++i;
-                    continue;
-                }
-
-                if (is_open_curly)
-                    throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
-
-                String to_add = String(pattern.data() + start_pos, i - start_pos);
-                double_brace_removal(to_add);
-                if (!glue_to_next)
-                    substrings.emplace_back(to_add);
-                else
-                    substrings.back() += to_add;
-
-                glue_to_next = false;
-
-                is_open_curly = true;
-                last_open = i + 1;
-            }
-            else if (pattern[i] == '}')
-            {
-                if (pattern[i + 1] == '}')
-                {
-                    ++i;
-                    continue;
-                }
-
-                if (!is_open_curly)
-                    throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
-
-                is_open_curly = false;
-
-                if (last_open == i)
-                {
-                    if (is_plain_numbering && !*is_plain_numbering)
-                        throw Exception(
-                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
-                    is_plain_numbering = true;
-                    if (index_if_plain >= argument_number)
-                        throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
-                    *index_positions_ptr = index_if_plain++;
-                }
-                else
-                {
-                    if (is_plain_numbering && *is_plain_numbering)
-                        throw Exception(
-                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
-                    is_plain_numbering = false;
-
-                    UInt64 arg;
-                    parseNumber(pattern, last_open, i, arg);
-
-                    if (arg >= argument_number)
-                        throw Exception(
-                            "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
-
-                    *index_positions_ptr = arg;
-                }
-
-                /// Constant string.
-                if (!data[*index_positions_ptr])
-                {
-                    /// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
-                    glue_to_next = true;
-                    substrings.back() += constant_strings[*index_positions_ptr];
-                }
-                else
-                    ++index_positions_ptr; /// Otherwise we commit arg number and proceed.
-
-                start_pos = i + 1;
-            }
-        }
-
-        if (is_open_curly)
-            throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
-
-        String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
-        double_brace_removal(to_add);
-
-        if (!glue_to_next)
-            substrings.emplace_back(to_add);
-        else
-            substrings.back() += to_add;
-    }
-
    /// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string.
    /// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString.
    /// fixed_string_N for savings N to fixed strings.
-    /// constant_strings for constant strings. If data[i] is nullptr, than it is constant string.
+    /// constant_strings for constant strings. If data[i] is nullptr, it is constant string.
    /// res_data is result_data, res_offsets is offset result.
    /// input_rows_count is the number of rows processed.
    /// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
@ -207,29 +48,22 @@ struct FormatImpl
        const std::vector<const ColumnString::Chars *> & data,
        const std::vector<const ColumnString::Offsets *> & offsets,
        [[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
-        const std::vector<String> & constant_strings,
+        const std::vector<std::optional<String>> & constant_strings,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets,
        size_t input_rows_count)
    {
        const size_t argument_number = offsets.size();

-        UInt64 small_index_positions_buffer[small_argument_threshold];
-        /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this array will be filled with [1, 3, 1, 0, ... (garbage)] but without constant string indices.
-        UInt64 * index_positions = small_index_positions_buffer;
-
-        std::unique_ptr<UInt64[]> big_index_positions_buffer;
-        if (argument_number > small_argument_threshold)
-        {
-            big_index_positions_buffer.reset(new UInt64[argument_number]);
-            index_positions = big_index_positions_buffer.get();
-        }
+        /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this
+        /// array will be filled with [1, 3, 1, 0] but without constant string indices.
+        Format::IndexPositions index_positions;

        /// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation.
        /// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``].
        std::vector<String> substrings;

-        init(pattern, data, argument_number, constant_strings, index_positions, substrings);
+        Format::init(pattern, argument_number, constant_strings, index_positions, substrings);

        UInt64 final_size = 0;

@ -271,7 +105,7 @@ struct FormatImpl
                for (size_t j = 1; j < substrings.size(); ++j)
                {
                    UInt64 arg = index_positions[j - 1];
-                    auto offset_ptr = offsets[arg];
+                    const auto * offset_ptr = offsets[arg];
                    UInt64 arg_offset = 0;
                    UInt64 size = 0;

--- a/tests/ci/cherry_pick_utils/parser.py
+++ b/tests/ci/cherry_pick_utils/parser.py
@ -20,8 +20,6 @@ class Description:

    def __init__(self, pull_request):
        self.label_name = str()
-        self.legal = False
-
        self._parse(pull_request["bodyText"])

    def _parse(self, text):
@ -39,12 +37,6 @@ class Description:
                category = stripped
                next_category = False

-            if (
-                stripped
-                == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
-            ):
-                self.legal = True
-
            category_headers = (
                "Category (leave one):",
                "Changelog category (leave one):",
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@ -59,3 +59,17 @@ def post_commit_status_to_file(file_path, description, state, report_url):
    with open(file_path, "w", encoding="utf-8") as f:
        out = csv.writer(f, delimiter="\t")
        out.writerow([state, report_url, description])
+
+
+def remove_labels(gh, pr_info, labels_names):
+    repo = gh.get_repo(GITHUB_REPOSITORY)
+    pull_request = repo.get_pull(pr_info.number)
+    for label in labels_names:
+        pull_request.remove_from_labels(label)
+
+
+def post_labels(gh, pr_info, labels_names):
+    repo = gh.get_repo(GITHUB_REPOSITORY)
+    pull_request = repo.get_pull(pr_info.number)
+    for label in labels_names:
+        pull_request.add_to_labels(label)
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@ -236,6 +236,15 @@ class PRInfo:
                return True
        return False

+    def has_changes_in_submodules(self):
+        if self.changed_files is None or not self.changed_files:
+            return True
+
+        for f in self.changed_files:
+            if "contrib" in f:
+                return True
+        return False
+
    def can_skip_builds_and_use_version_from_master(self):
        # TODO: See a broken loop
        if "force tests" in self.labels:
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -8,7 +8,7 @@ from github import Github
 from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from pr_info import PRInfo
 from get_robot_token import get_best_robot_token
-from commit_status_helper import get_commit
+from commit_status_helper import get_commit, post_labels, remove_labels

 NAME = "Run Check (actions)"

@ -22,6 +22,7 @@ OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
 CAN_BE_TESTED_LABEL = "can be tested"
 DO_NOT_TEST_LABEL = "do not test"
 FORCE_TESTS_LABEL = "force tests"
+SUBMODULE_CHANGED_LABEL = "submodule changed"

 # Individual trusted contirbutors who are not in any trusted organization.
 # Can be changed in runtime: we will append users that we learned to be in
@ -81,6 +82,25 @@ TRUSTED_CONTRIBUTORS = {
    ]
 }

+MAP_CATEGORY_TO_LABEL = {
+    "New Feature": "pr-feature",
+    "Bug Fix": "pr-bugfix",
+    "Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix",
+    "Improvement": "pr-improvement",
+    "Performance Improvement": "pr-performance",
+    "Backward Incompatible Change": "pr-backward-incompatible",
+    "Build/Testing/Packaging Improvement": "pr-build",
+    "Build Improvement": "pr-build",
+    "Build/Testing Improvement": "pr-build",
+    "Build": "pr-build",
+    "Packaging Improvement": "pr-build",
+    "Not for changelog (changelog entry is not required)": "pr-not-for-changelog",
+    "Not for changelog": "pr-not-for-changelog",
+    "Documentation (changelog entry is not required)": "pr-documentation",
+    "Documentation": "pr-documentation",
+    # 'Other': doesn't match anything
+}
+

 def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
    if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@ -168,7 +188,7 @@ def check_pr_description(pr_info):
                    + second_category
                    + "'"
                )
-                return result_status[:140]
+                return result_status[:140], category

        elif re.match(
            r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
@ -190,30 +210,57 @@ def check_pr_description(pr_info):
            i += 1

    if not category:
-        return "Changelog category is empty"
+        return "Changelog category is empty", category

    # Filter out the PR categories that are not for changelog.
    if re.match(
        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
        category,
    ):
-        return ""
+        return "", category

    if not entry:
-        return f"Changelog entry required for category '{category}'"
+        return f"Changelog entry required for category '{category}'", category

-    return ""
+    return "", category


 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

-    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True)
+    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
    can_run, description, labels_state = should_run_checks_for_pr(pr_info)
    gh = Github(get_best_robot_token())
    commit = get_commit(gh, pr_info.sha)

-    description_report = check_pr_description(pr_info)[:139]
+    description_report, category = check_pr_description(pr_info)
+    pr_labels_to_add = []
+    pr_labels_to_remove = []
+    if (
+        category in MAP_CATEGORY_TO_LABEL
+        and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels
+    ):
+        pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category])
+
+    for label in pr_info.labels:
+        if (
+            label in MAP_CATEGORY_TO_LABEL.values()
+            and category in MAP_CATEGORY_TO_LABEL
+            and label != MAP_CATEGORY_TO_LABEL[category]
+        ):
+            pr_labels_to_remove.append(label)
+
+    if pr_info.has_changes_in_submodules():
+        pr_labels_to_add.append(SUBMODULE_CHANGED_LABEL)
+    elif SUBMODULE_CHANGED_LABEL in pr_info.labels:
+        pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL)
+
+    if pr_labels_to_add:
+        post_labels(gh, pr_info, pr_labels_to_add)
+
+    if pr_labels_to_remove:
+        remove_labels(gh, pr_info, pr_labels_to_remove)
+
    if description_report:
        print("::notice ::Cannot run, description does not match the template")
        logging.info(
@ -225,7 +272,7 @@ if __name__ == "__main__":
        )
        commit.create_status(
            context=NAME,
-            description=description_report,
+            description=description_report[:139],
            state="failure",
            target_url=url,
        )
--- a/tests/queries/0_stateless/02245_format_string_stack_overflow.reference
+++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.reference
@ -0,0 +1 @@
+00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
--- a/tests/queries/0_stateless/02245_format_string_stack_overflow.sql
+++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.sql
--- a/utils/github/init.py
+++ b/utils/github/init.py
@ -1 +0,0 @@
-# -*- coding: utf-8 -*-
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@ -1,185 +0,0 @@
-# -*- coding: utf-8 -*-
-
-try:
-    from clickhouse.utils.github.cherrypick import CherryPick
-    from clickhouse.utils.github.query import Query as RemoteRepo
-    from clickhouse.utils.github.local import Repository as LocalRepo
-except:
-    from .cherrypick import CherryPick
-    from .query import Query as RemoteRepo
-    from .local import Repository as LocalRepo
-
-import argparse
-import logging
-import re
-import sys
-
-
-class Backport:
-    def __init__(self, token, owner, name, team):
-        self._gh = RemoteRepo(
-            token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
-        )
-        self._token = token
-        self.default_branch_name = self._gh.default_branch
-        self.ssh_url = self._gh.ssh_url
-
-    def getPullRequests(self, from_commit):
-        return self._gh.get_pull_requests(from_commit)
-
-    def getBranchesWithRelease(self):
-        branches = set()
-        for pull_request in self._gh.find_pull_requests("release"):
-            branches.add(pull_request["headRefName"])
-        return branches
-
-    def execute(self, repo, upstream, until_commit, run_cherrypick):
-        repo = LocalRepo(repo, upstream, self.default_branch_name)
-        all_branches = repo.get_release_branches()  # [(branch_name, base_commit)]
-
-        release_branches = self.getBranchesWithRelease()
-
-        branches = []
-        # iterate over all branches to preserve their precedence.
-        for branch in all_branches:
-            if branch[0] in release_branches:
-                branches.append(branch)
-
-        if not branches:
-            logging.info("No release branches found!")
-            return
-
-        for branch in branches:
-            logging.info("Found release branch: %s", branch[0])
-
-        if not until_commit:
-            until_commit = branches[0][1]
-        pull_requests = self.getPullRequests(until_commit)
-
-        backport_map = {}
-
-        RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
-        RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
-        RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
-
-        # pull-requests are sorted by ancestry from the most recent.
-        for pr in pull_requests:
-            while repo.comparator(branches[-1][1]) >= repo.comparator(
-                pr["mergeCommit"]["oid"]
-            ):
-                logging.info(
-                    "PR #{} is already inside {}. Dropping this branch for further PRs".format(
-                        pr["number"], branches[-1][0]
-                    )
-                )
-                branches.pop()
-
-            logging.info("Processing PR #{}".format(pr["number"]))
-
-            assert len(branches)
-
-            branch_set = set([branch[0] for branch in branches])
-
-            # First pass. Find all must-backports
-            for label in pr["labels"]["nodes"]:
-                if label["name"] == "pr-must-backport":
-                    backport_map[pr["number"]] = branch_set.copy()
-                    continue
-                matched = RE_MUST_BACKPORT.match(label["name"])
-                if matched:
-                    if pr["number"] not in backport_map:
-                        backport_map[pr["number"]] = set()
-                    backport_map[pr["number"]].add(matched.group(1))
-
-            # Second pass. Find all no-backports
-            for label in pr["labels"]["nodes"]:
-                if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
-                    del backport_map[pr["number"]]
-                    break
-                matched_no_backport = RE_NO_BACKPORT.match(label["name"])
-                matched_backported = RE_BACKPORTED.match(label["name"])
-                if (
-                    matched_no_backport
-                    and pr["number"] in backport_map
-                    and matched_no_backport.group(1) in backport_map[pr["number"]]
-                ):
-                    backport_map[pr["number"]].remove(matched_no_backport.group(1))
-                    logging.info(
-                        "\tskipping %s because of forced no-backport",
-                        matched_no_backport.group(1),
-                    )
-                elif (
-                    matched_backported
-                    and pr["number"] in backport_map
-                    and matched_backported.group(1) in backport_map[pr["number"]]
-                ):
-                    backport_map[pr["number"]].remove(matched_backported.group(1))
-                    logging.info(
-                        "\tskipping %s because it's already backported manually",
-                        matched_backported.group(1),
-                    )
-
-        for pr, branches in list(backport_map.items()):
-            logging.info("PR #%s needs to be backported to:", pr)
-            for branch in branches:
-                logging.info(
-                    "\t%s, and the status is: %s",
-                    branch,
-                    run_cherrypick(self._token, pr, branch),
-                )
-
-        # print API costs
-        logging.info("\nGitHub API total costs per query:")
-        for name, value in list(self._gh.api_costs.items()):
-            logging.info("%s : %s", name, value)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--token", type=str, required=True, help="token for Github access"
-    )
-    parser.add_argument(
-        "--repo",
-        type=str,
-        required=True,
-        help="path to full repository",
-        metavar="PATH",
-    )
-    parser.add_argument(
-        "--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="do not create or merge any PRs",
-        default=False,
-    )
-    parser.add_argument(
-        "--verbose",
-        "-v",
-        action="store_true",
-        help="more verbose output",
-        default=False,
-    )
-    parser.add_argument(
-        "--upstream",
-        "-u",
-        type=str,
-        help="remote name of upstream in repository",
-        default="origin",
-    )
-    args = parser.parse_args()
-
-    if args.verbose:
-        logging.basicConfig(
-            format="%(message)s", stream=sys.stdout, level=logging.DEBUG
-        )
-    else:
-        logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
-
-    cherrypick_run = lambda token, pr, branch: CherryPick(
-        token, "ClickHouse", "ClickHouse", "core", pr, branch
-    ).execute(args.repo, args.dry_run)
-    bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
-    bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
--- a/utils/github/cherrypick.py
+++ b/utils/github/cherrypick.py
@ -1,323 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Backports changes from PR to release branch.
-Requires multiple separate runs as part of the implementation.
-
-First run should do the following:
-1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr})
-2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit})
-3. Create PR from temporary branch to backport branch (emulating cherry-pick).
-
-Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
-
-Third run creates PR from backport branch (with merged previous PR) to release branch.
-"""
-
-try:
-    from clickhouse.utils.github.query import Query as RemoteRepo
-except:
-    from .query import Query as RemoteRepo
-
-import argparse
-from enum import Enum
-import logging
-import os
-import subprocess
-import sys
-
-
-class CherryPick:
-    class Status(Enum):
-        DISCARDED = "discarded"
-        NOT_INITIATED = "not started"
-        FIRST_MERGEABLE = "waiting for 1st stage"
-        FIRST_CONFLICTS = "conflicts on 1st stage"
-        SECOND_MERGEABLE = "waiting for 2nd stage"
-        SECOND_CONFLICTS = "conflicts on 2nd stage"
-        MERGED = "backported"
-
-    def _run(self, args):
-        out = subprocess.check_output(args).rstrip()
-        logging.debug(out)
-        return out
-
-    def __init__(self, token, owner, name, team, pr_number, target_branch):
-        self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
-        self._pr = self._gh.get_pull_request(pr_number)
-
-        self.ssh_url = self._gh.ssh_url
-
-        # TODO: check if pull-request is merged.
-
-        self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
-
-        self.target_branch = target_branch
-        self.backport_branch = "backport/{branch}/{pr}".format(
-            branch=target_branch, pr=pr_number
-        )
-        self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
-            branch=target_branch, oid=self.merge_commit_oid
-        )
-
-    def getCherryPickPullRequest(self):
-        return self._gh.find_pull_request(
-            base=self.backport_branch, head=self.cherrypick_branch
-        )
-
-    def createCherryPickPullRequest(self, repo_path):
-        DESCRIPTION = (
-            "This pull-request is a first step of an automated backporting.\n"
-            "It contains changes like after calling a local command `git cherry-pick`.\n"
-            "If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
-            "Otherwise, if you do not want to backport them, then just close this pull-request.\n"
-            "\n"
-            "The check results does not matter at this step - you can safely ignore them.\n"
-            "Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
-        )
-
-        # FIXME: replace with something better than os.system()
-        git_prefix = [
-            "git",
-            "-C",
-            repo_path,
-            "-c",
-            "user.email=robot-clickhouse@yandex-team.ru",
-            "-c",
-            "user.name=robot-clickhouse",
-        ]
-        base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
-
-        # Create separate branch for backporting, and make it look like real cherry-pick.
-        self._run(git_prefix + ["checkout", "-f", self.target_branch])
-        self._run(git_prefix + ["checkout", "-B", self.backport_branch])
-        self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
-
-        # Create secondary branch to allow pull request with cherry-picked commit.
-        self._run(
-            git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
-        )
-
-        self._run(
-            git_prefix
-            + [
-                "push",
-                "-f",
-                "origin",
-                "{branch}:{branch}".format(branch=self.backport_branch),
-            ]
-        )
-        self._run(
-            git_prefix
-            + [
-                "push",
-                "-f",
-                "origin",
-                "{branch}:{branch}".format(branch=self.cherrypick_branch),
-            ]
-        )
-
-        # Create pull-request like a local cherry-pick
-        pr = self._gh.create_pull_request(
-            source=self.cherrypick_branch,
-            target=self.backport_branch,
-            title="Cherry pick #{number} to {target}: {title}".format(
-                number=self._pr["number"],
-                target=self.target_branch,
-                title=self._pr["title"].replace('"', '\\"'),
-            ),
-            description="Original pull-request #{}\n\n{}".format(
-                self._pr["number"], DESCRIPTION
-            ),
-        )
-
-        # FIXME: use `team` to leave a single eligible assignee.
-        self._gh.add_assignee(pr, self._pr["author"])
-        self._gh.add_assignee(pr, self._pr["mergedBy"])
-
-        self._gh.set_label(pr, "do not test")
-        self._gh.set_label(pr, "pr-cherrypick")
-
-        return pr
-
-    def mergeCherryPickPullRequest(self, cherrypick_pr):
-        return self._gh.merge_pull_request(cherrypick_pr["id"])
-
-    def getBackportPullRequest(self):
-        return self._gh.find_pull_request(
-            base=self.target_branch, head=self.backport_branch
-        )
-
-    def createBackportPullRequest(self, cherrypick_pr, repo_path):
-        DESCRIPTION = (
-            "This pull-request is a last step of an automated backporting.\n"
-            "Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
-            "Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
-        )
-
-        git_prefix = [
-            "git",
-            "-C",
-            repo_path,
-            "-c",
-            "user.email=robot-clickhouse@clickhouse.com",
-            "-c",
-            "user.name=robot-clickhouse",
-        ]
-
-        pr_title = "Backport #{number} to {target}: {title}".format(
-            number=self._pr["number"],
-            target=self.target_branch,
-            title=self._pr["title"].replace('"', '\\"'),
-        )
-
-        self._run(git_prefix + ["checkout", "-f", self.backport_branch])
-        self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
-        self._run(
-            git_prefix
-            + [
-                "reset",
-                "--soft",
-                self._run(
-                    git_prefix
-                    + [
-                        "merge-base",
-                        "origin/" + self.target_branch,
-                        self.backport_branch,
-                    ]
-                ),
-            ]
-        )
-        self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
-        self._run(
-            git_prefix
-            + [
-                "push",
-                "-f",
-                "origin",
-                "{branch}:{branch}".format(branch=self.backport_branch),
-            ]
-        )
-
-        pr = self._gh.create_pull_request(
-            source=self.backport_branch,
-            target=self.target_branch,
-            title=pr_title,
-            description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
-                self._pr["number"], cherrypick_pr["number"], DESCRIPTION
-            ),
-        )
-
-        # FIXME: use `team` to leave a single eligible assignee.
-        self._gh.add_assignee(pr, self._pr["author"])
-        self._gh.add_assignee(pr, self._pr["mergedBy"])
-
-        self._gh.set_label(pr, "pr-backport")
-
-        return pr
-
-    def execute(self, repo_path, dry_run=False):
-        pr1 = self.getCherryPickPullRequest()
-        if not pr1:
-            if not dry_run:
-                pr1 = self.createCherryPickPullRequest(repo_path)
-                logging.debug(
-                    "Created PR with cherry-pick of %s to %s: %s",
-                    self._pr["number"],
-                    self.target_branch,
-                    pr1["url"],
-                )
-            else:
-                return CherryPick.Status.NOT_INITIATED
-        else:
-            logging.debug(
-                "Found PR with cherry-pick of %s to %s: %s",
-                self._pr["number"],
-                self.target_branch,
-                pr1["url"],
-            )
-
-        if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
-            if not dry_run:
-                pr1 = self.mergeCherryPickPullRequest(pr1)
-                logging.debug(
-                    "Merged PR with cherry-pick of %s to %s: %s",
-                    self._pr["number"],
-                    self.target_branch,
-                    pr1["url"],
-                )
-
-        if not pr1["merged"]:
-            logging.debug(
-                "Waiting for PR with cherry-pick of %s to %s: %s",
-                self._pr["number"],
-                self.target_branch,
-                pr1["url"],
-            )
-
-            if pr1["closed"]:
-                return CherryPick.Status.DISCARDED
-            elif pr1["mergeable"] == "CONFLICTING":
-                return CherryPick.Status.FIRST_CONFLICTS
-            else:
-                return CherryPick.Status.FIRST_MERGEABLE
-
-        pr2 = self.getBackportPullRequest()
-        if not pr2:
-            if not dry_run:
-                pr2 = self.createBackportPullRequest(pr1, repo_path)
-                logging.debug(
-                    "Created PR with backport of %s to %s: %s",
-                    self._pr["number"],
-                    self.target_branch,
-                    pr2["url"],
-                )
-            else:
-                return CherryPick.Status.FIRST_MERGEABLE
-        else:
-            logging.debug(
-                "Found PR with backport of %s to %s: %s",
-                self._pr["number"],
-                self.target_branch,
-                pr2["url"],
-            )
-
-        if pr2["merged"]:
-            return CherryPick.Status.MERGED
-        elif pr2["closed"]:
-            return CherryPick.Status.DISCARDED
-        elif pr2["mergeable"] == "CONFLICTING":
-            return CherryPick.Status.SECOND_CONFLICTS
-        else:
-            return CherryPick.Status.SECOND_MERGEABLE
-
-
-if __name__ == "__main__":
-    logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--token", "-t", type=str, required=True, help="token for Github access"
-    )
-    parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
-    parser.add_argument(
-        "--branch",
-        "-b",
-        type=str,
-        required=True,
-        help="target branch name for cherry-pick",
-    )
-    parser.add_argument(
-        "--repo",
-        "-r",
-        type=str,
-        required=True,
-        help="path to full repository",
-        metavar="PATH",
-    )
-    args = parser.parse_args()
-
-    cp = CherryPick(
-        args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
-    )
-    cp.execute(args.repo)
--- a/utils/github/local.py
+++ b/utils/github/local.py
@ -1,108 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import functools
-import logging
-import os
-import re
-
-
-class RepositoryBase:
-    def __init__(self, repo_path):
-        import git
-
-        self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
-
-        # comparator of commits
-        def cmp(x, y):
-            if str(x) == str(y):
-                return 0
-            if self._repo.is_ancestor(x, y):
-                return -1
-            else:
-                return 1
-
-        self.comparator = functools.cmp_to_key(cmp)
-
-    def get_head_commit(self):
-        return self._repo.commit(self._default)
-
-    def iterate(self, begin, end):
-        rev_range = "{}...{}".format(begin, end)
-        for commit in self._repo.iter_commits(rev_range, first_parent=True):
-            yield commit
-
-
-class Repository(RepositoryBase):
-    def __init__(self, repo_path, remote_name, default_branch_name):
-        super(Repository, self).__init__(repo_path)
-        self._remote = self._repo.remotes[remote_name]
-        self._remote.fetch()
-        self._default = self._remote.refs[default_branch_name]
-
-    def get_release_branches(self):
-        """
-        Returns sorted list of tuples:
-         * remote branch (git.refs.remote.RemoteReference),
-         * base commit (git.Commit),
-         * head (git.Commit)).
-        List is sorted by commits in ascending order.
-        """
-        release_branches = []
-
-        RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
-
-        for branch in [
-            r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
-        ]:
-            base = self._repo.merge_base(self._default, self._repo.commit(branch))
-            if not base:
-                logging.info(
-                    "Branch %s is not based on branch %s. Ignoring.",
-                    branch.path,
-                    self._default,
-                )
-            elif len(base) > 1:
-                logging.info(
-                    "Branch %s has more than one base commit. Ignoring.", branch.path
-                )
-            else:
-                release_branches.append((os.path.basename(branch.name), base[0]))
-
-        return sorted(release_branches, key=lambda x: self.comparator(x[1]))
-
-
-class BareRepository(RepositoryBase):
-    def __init__(self, repo_path, default_branch_name):
-        super(BareRepository, self).__init__(repo_path)
-        self._default = self._repo.branches[default_branch_name]
-
-    def get_release_branches(self):
-        """
-        Returns sorted list of tuples:
-         * branch (git.refs.head?),
-         * base commit (git.Commit),
-         * head (git.Commit)).
-        List is sorted by commits in ascending order.
-        """
-        release_branches = []
-
-        RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
-
-        for branch in [
-            r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
-        ]:
-            base = self._repo.merge_base(self._default, self._repo.commit(branch))
-            if not base:
-                logging.info(
-                    "Branch %s is not based on branch %s. Ignoring.",
-                    branch.path,
-                    self._default,
-                )
-            elif len(base) > 1:
-                logging.info(
-                    "Branch %s has more than one base commit. Ignoring.", branch.path
-                )
-            else:
-                release_branches.append((os.path.basename(branch.name), base[0]))
-
-        return sorted(release_branches, key=lambda x: self.comparator(x[1]))
--- a/utils/github/parser.py
+++ b/utils/github/parser.py
@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-
-
-class Description:
-    """Parsed description representation"""
-
-    MAP_CATEGORY_TO_LABEL = {
-        "New Feature": "pr-feature",
-        "Bug Fix": "pr-bugfix",
-        "Improvement": "pr-improvement",
-        "Performance Improvement": "pr-performance",
-        # 'Backward Incompatible Change': doesn't match anything
-        "Build/Testing/Packaging Improvement": "pr-build",
-        "Non-significant (changelog entry is not needed)": "pr-non-significant",
-        "Non-significant (changelog entry is not required)": "pr-non-significant",
-        "Non-significant": "pr-non-significant",
-        "Documentation (changelog entry is not required)": "pr-documentation",
-        # 'Other': doesn't match anything
-    }
-
-    def __init__(self, pull_request):
-        self.label_name = str()
-        self.legal = False
-
-        self._parse(pull_request["bodyText"])
-
-    def _parse(self, text):
-        lines = text.splitlines()
-        next_category = False
-        category = str()
-
-        for line in lines:
-            stripped = line.strip()
-
-            if not stripped:
-                continue
-
-            if next_category:
-                category = stripped
-                next_category = False
-
-            if (
-                stripped
-                == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
-            ):
-                self.legal = True
-
-            category_headers = (
-                "Category (leave one):",
-                "Changelog category (leave one):",
-                "Changelog category:",
-                "Category:",
-            )
-
-            if stripped in category_headers:
-                next_category = True
-
-        if category in Description.MAP_CATEGORY_TO_LABEL:
-            self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
-        else:
-            if not category:
-                print("Cannot find category in pr description")
-            else:
-                print(("Unknown category: " + category))
--- a/utils/github/query.py
+++ b/utils/github/query.py
@ -1,492 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import requests
-
-
-class Query:
-    """
-    Implements queries to the Github API using GraphQL
-    """
-
-    _PULL_REQUEST = """
-        author {{
-            ... on User {{
-                id
-                login
-            }}
-        }}
-
-        baseRepository {{
-            nameWithOwner
-        }}
-
-        mergeCommit {{
-            oid
-            parents(first: {min_page_size}) {{
-                totalCount
-                nodes {{
-                    oid
-                }}
-            }}
-        }}
-
-        mergedBy {{
-            ... on User {{
-                id
-                login
-            }}
-        }}
-
-        baseRefName
-        closed
-        headRefName
-        id
-        mergeable
-        merged
-        number
-        title
-        url
-    """
-
-    def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
-        self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
-
-        self._token = token
-        self._owner = owner
-        self._name = name
-        self._team = team
-
-        self._max_page_size = max_page_size
-        self._min_page_size = min_page_size
-
-        self.api_costs = {}
-
-        repo = self.get_repository()
-        self._id = repo["id"]
-        self.ssh_url = repo["sshUrl"]
-        self.default_branch = repo["defaultBranchRef"]["name"]
-
-        self.members = set(self.get_members())
-
-    def get_repository(self):
-        _QUERY = """
-            repository(owner: "{owner}" name: "{name}") {{
-                defaultBranchRef {{
-                    name
-                }}
-                id
-                sshUrl
-            }}
-        """
-
-        query = _QUERY.format(owner=self._owner, name=self._name)
-        return self._run(query)["repository"]
-
-    def get_members(self):
-        """Get all team members for organization
-
-        Returns:
-            members: a map of members' logins to ids
-        """
-
-        _QUERY = """
-            organization(login: "{organization}") {{
-                team(slug: "{team}") {{
-                    members(first: {max_page_size} {next}) {{
-                        pageInfo {{
-                            hasNextPage
-                            endCursor
-                        }}
-                        nodes {{
-                            id
-                            login
-                        }}
-                    }}
-                }}
-            }}
-        """
-
-        members = {}
-        not_end = True
-        query = _QUERY.format(
-            organization=self._owner,
-            team=self._team,
-            max_page_size=self._max_page_size,
-            next="",
-        )
-
-        while not_end:
-            result = self._run(query)["organization"]["team"]
-            if result is None:
-                break
-            result = result["members"]
-            not_end = result["pageInfo"]["hasNextPage"]
-            query = _QUERY.format(
-                organization=self._owner,
-                team=self._team,
-                max_page_size=self._max_page_size,
-                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
-            )
-
-            members += dict([(node["login"], node["id"]) for node in result["nodes"]])
-
-        return members
-
-    def get_pull_request(self, number):
-        _QUERY = """
-            repository(owner: "{owner}" name: "{name}") {{
-                pullRequest(number: {number}) {{
-                    {pull_request_data}
-                }}
-            }}
-        """
-
-        query = _QUERY.format(
-            owner=self._owner,
-            name=self._name,
-            number=number,
-            pull_request_data=self._PULL_REQUEST,
-            min_page_size=self._min_page_size,
-        )
-        return self._run(query)["repository"]["pullRequest"]
-
-    def find_pull_request(self, base, head):
-        _QUERY = """
-            repository(owner: "{owner}" name: "{name}") {{
-                pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
-                    nodes {{
-                        {pull_request_data}
-                    }}
-                    totalCount
-                }}
-            }}
-        """
-
-        query = _QUERY.format(
-            owner=self._owner,
-            name=self._name,
-            base=base,
-            head=head,
-            pull_request_data=self._PULL_REQUEST,
-            min_page_size=self._min_page_size,
-        )
-        result = self._run(query)["repository"]["pullRequests"]
-        if result["totalCount"] > 0:
-            return result["nodes"][0]
-        else:
-            return {}
-
-    def find_pull_requests(self, label_name):
-        """
-        Get all pull-requests filtered by label name
-        """
-        _QUERY = """
-            repository(owner: "{owner}" name: "{name}") {{
-                pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
-                    nodes {{
-                        {pull_request_data}
-                    }}
-                }}
-            }}
-        """
-
-        query = _QUERY.format(
-            owner=self._owner,
-            name=self._name,
-            label_name=label_name,
-            pull_request_data=self._PULL_REQUEST,
-            min_page_size=self._min_page_size,
-        )
-        return self._run(query)["repository"]["pullRequests"]["nodes"]
-
-    def get_pull_requests(self, before_commit):
-        """
-        Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
-        """
-
-        _QUERY = """
-            repository(owner: "{owner}" name: "{name}") {{
-                defaultBranchRef {{
-                    target {{
-                        ... on Commit {{
-                            history(first: {max_page_size} {next}) {{
-                                pageInfo {{
-                                    hasNextPage
-                                    endCursor
-                                }}
-                                nodes {{
-                                    oid
-                                    associatedPullRequests(first: {min_page_size}) {{
-                                        totalCount
-                                        nodes {{
-                                            ... on PullRequest {{
-                                                {pull_request_data}
-
-                                                labels(first: {min_page_size}) {{
-                                                    totalCount
-                                                    pageInfo {{
-                                                        hasNextPage
-                                                        endCursor
-                                                    }}
-                                                    nodes {{
-                                                        name
-                                                        color
-                                                    }}
-                                                }}
-                                            }}
-                                        }}
-                                    }}
-                                }}
-                            }}
-                        }}
-                    }}
-                }}
-            }}
-        """
-
-        pull_requests = []
-        not_end = True
-        query = _QUERY.format(
-            owner=self._owner,
-            name=self._name,
-            max_page_size=self._max_page_size,
-            min_page_size=self._min_page_size,
-            pull_request_data=self._PULL_REQUEST,
-            next="",
-        )
-
-        while not_end:
-            result = self._run(query)["repository"]["defaultBranchRef"]["target"][
-                "history"
-            ]
-            not_end = result["pageInfo"]["hasNextPage"]
-            query = _QUERY.format(
-                owner=self._owner,
-                name=self._name,
-                max_page_size=self._max_page_size,
-                min_page_size=self._min_page_size,
-                pull_request_data=self._PULL_REQUEST,
-                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
-            )
-
-            for commit in result["nodes"]:
-                # FIXME: maybe include `before_commit`?
-                if str(commit["oid"]) == str(before_commit):
-                    not_end = False
-                    break
-
-                # TODO: fetch all pull-requests that were merged in a single commit.
-                assert (
-                    commit["associatedPullRequests"]["totalCount"]
-                    <= self._min_page_size
-                )
-
-                for pull_request in commit["associatedPullRequests"]["nodes"]:
-                    if (
-                        pull_request["baseRepository"]["nameWithOwner"]
-                        == "{}/{}".format(self._owner, self._name)
-                        and pull_request["baseRefName"] == self.default_branch
-                        and pull_request["mergeCommit"]["oid"] == commit["oid"]
-                    ):
-                        pull_requests.append(pull_request)
-
-        return pull_requests
-
-    def create_pull_request(
-        self, source, target, title, description="", draft=False, can_modify=True
-    ):
-        _QUERY = """
-            createPullRequest(input: {{
-                baseRefName: "{target}",
-                headRefName: "{source}",
-                repositoryId: "{id}",
-                title: "{title}",
-                body: "{body}",
-                draft: {draft},
-                maintainerCanModify: {modify}
-            }}) {{
-                pullRequest {{
-                    {pull_request_data}
-                }}
-            }}
-        """
-
-        query = _QUERY.format(
-            target=target,
-            source=source,
-            id=self._id,
-            title=title,
-            body=description,
-            draft="true" if draft else "false",
-            modify="true" if can_modify else "false",
-            pull_request_data=self._PULL_REQUEST,
-        )
-        return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
-
-    def merge_pull_request(self, id):
-        _QUERY = """
-            mergePullRequest(input: {{
-                pullRequestId: "{id}"
-            }}) {{
-                pullRequest {{
-                    {pull_request_data}
-                }}
-            }}
-        """
-
-        query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
-        return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
-
-    # FIXME: figure out how to add more assignees at once
-    def add_assignee(self, pr, assignee):
-        _QUERY = """
-            addAssigneesToAssignable(input: {{
-                assignableId: "{id1}",
-                assigneeIds: "{id2}"
-            }}) {{
-                clientMutationId
-            }}
-        """
-
-        query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
-        self._run(query, is_mutation=True)
-
-    def set_label(self, pull_request, label_name):
-        """
-        Set label by name to the pull request
-
-        Args:
-            pull_request: JSON object returned by `get_pull_requests()`
-            label_name (string): label name
-        """
-
-        _GET_LABEL = """
-            repository(owner: "{owner}" name: "{name}") {{
-                labels(first: {max_page_size} {next} query: "{label_name}") {{
-                    pageInfo {{
-                        hasNextPage
-                        endCursor
-                    }}
-                    nodes {{
-                        id
-                        name
-                        color
-                    }}
-                }}
-            }}
-        """
-
-        _SET_LABEL = """
-            addLabelsToLabelable(input: {{
-                labelableId: "{pr_id}",
-                labelIds: "{label_id}"
-            }}) {{
-                clientMutationId
-            }}
-        """
-
-        labels = []
-        not_end = True
-        query = _GET_LABEL.format(
-            owner=self._owner,
-            name=self._name,
-            label_name=label_name,
-            max_page_size=self._max_page_size,
-            next="",
-        )
-
-        while not_end:
-            result = self._run(query)["repository"]["labels"]
-            not_end = result["pageInfo"]["hasNextPage"]
-            query = _GET_LABEL.format(
-                owner=self._owner,
-                name=self._name,
-                label_name=label_name,
-                max_page_size=self._max_page_size,
-                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
-            )
-
-            labels += [label for label in result["nodes"]]
-
-        if not labels:
-            return
-
-        query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
-        self._run(query, is_mutation=True)
-
-    def _run(self, query, is_mutation=False):
-        from requests.adapters import HTTPAdapter
-        from urllib3.util.retry import Retry
-
-        def requests_retry_session(
-            retries=3,
-            backoff_factor=0.3,
-            status_forcelist=(500, 502, 504),
-            session=None,
-        ):
-            session = session or requests.Session()
-            retry = Retry(
-                total=retries,
-                read=retries,
-                connect=retries,
-                backoff_factor=backoff_factor,
-                status_forcelist=status_forcelist,
-            )
-            adapter = HTTPAdapter(max_retries=retry)
-            session.mount("http://", adapter)
-            session.mount("https://", adapter)
-            return session
-
-        headers = {"Authorization": "bearer {}".format(self._token)}
-        if is_mutation:
-            query = """
-            mutation {{
-                {query}
-            }}
-            """.format(
-                query=query
-            )
-        else:
-            query = """
-            query {{
-                {query}
-                rateLimit {{
-                    cost
-                    remaining
-                }}
-            }}
-            """.format(
-                query=query
-            )
-
-        while True:
-            request = requests_retry_session().post(
-                "https://api.github.com/graphql", json={"query": query}, headers=headers
-            )
-            if request.status_code == 200:
-                result = request.json()
-                if "errors" in result:
-                    raise Exception(
-                        "Errors occurred: {}\nOriginal query: {}".format(
-                            result["errors"], query
-                        )
-                    )
-
-                if not is_mutation:
-                    import inspect
-
-                    caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
-                    if caller not in list(self.api_costs.keys()):
-                        self.api_costs[caller] = 0
-                    self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
-
-                return result["data"]
-            else:
-                import json
-
-                raise Exception(
-                    "Query failed with code {code}:\n{json}".format(
-                        code=request.status_code,
-                        json=json.dumps(request.json(), indent=4),
-                    )
-                )
				`@ -0,0 +1 @@`
				00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000