Add even more python code to pylint check, fix lambdas

2024-11-21 15:12:02 +00:00 · 2024-02-29 02:21:39 +01:00 · 2024-02-29 02:21:39 +01:00 · d2f92483f2
commit d2f92483f2
parent 34bb40583c
11 changed files with 75 additions and 69 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,7 +12,8 @@ max-statements=200

 [tool.pylint.'MESSAGES CONTROL']
          # pytest.mark.parametrize is not callable (not-callable)
-disable = '''missing-docstring,
+disable = '''
+          missing-docstring,
          too-few-public-methods,
          invalid-name,
          too-many-arguments,
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@ -51,7 +51,7 @@ class Queue:
    label: str


-def get_scales(runner_type: str) -> Tuple[int, int]:
+def get_scales() -> Tuple[int, int]:
    "returns the multipliers for scaling down and up ASG by types"
    # Scaling down is quicker on the lack of running jobs than scaling up on
    # queue
@ -95,7 +95,7 @@ def set_capacity(
            continue
        raise ValueError("Queue status is not in ['in_progress', 'queued']")

-    scale_down, scale_up = get_scales(runner_type)
+    scale_down, scale_up = get_scales()
    # With lyfecycle hooks some instances are actually free because some of
    # them are in 'Terminating:Wait' state
    effective_capacity = max(
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@ -1,16 +1,15 @@
 #!/usr/bin/env python3

 import json
-import re
 import time
 from base64 import b64decode
 from collections import namedtuple
 from queue import Queue
 from threading import Thread
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional

 import requests
-from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description
+from lambda_shared.pr import check_pr_description
 from lambda_shared.token import get_cached_access_token

 NEED_RERUN_OR_CANCELL_WORKFLOWS = {
@ -48,16 +47,18 @@ class Worker(Thread):

 def _exec_get_with_retry(url: str, token: str) -> dict:
    headers = {"Authorization": f"token {token}"}
+    e = Exception()
    for i in range(MAX_RETRY):
        try:
-            response = requests.get(url, headers=headers)
+            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()
            return response.json()  # type: ignore
        except Exception as ex:
            print("Got exception executing request", ex)
+            e = ex
            time.sleep(i + 1)

-    raise Exception("Cannot execute GET request with retries")
+    raise requests.HTTPError("Cannot execute GET request with retries") from e


 WorkflowDescription = namedtuple(
@ -215,16 +216,18 @@ def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescripti

 def _exec_post_with_retry(url: str, token: str, json: Optional[Any] = None) -> Any:
    headers = {"Authorization": f"token {token}"}
+    e = Exception()
    for i in range(MAX_RETRY):
        try:
-            response = requests.post(url, headers=headers, json=json)
+            response = requests.post(url, headers=headers, json=json, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as ex:
            print("Got exception executing request", ex)
+            e = ex
            time.sleep(i + 1)

-    raise Exception("Cannot execute POST request with retry")
+    raise requests.HTTPError("Cannot execute POST request with retry") from e


 def exec_workflow_url(urls_to_post, token):
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@ -8,23 +8,14 @@ Lambda function to:

 import argparse
 import sys
-from datetime import datetime
-from typing import Dict, List
+from typing import Dict

-import requests
 import boto3  # type: ignore
-from botocore.exceptions import ClientError  # type: ignore
-
-from lambda_shared import (
-    RUNNER_TYPE_LABELS,
-    RunnerDescription,
-    RunnerDescriptions,
-    list_runners,
-)
+from lambda_shared import RUNNER_TYPE_LABELS, RunnerDescriptions, list_runners
 from lambda_shared.token import (
+    get_access_token_by_key_app,
    get_cached_access_token,
    get_key_and_app_from_aws,
-    get_access_token_by_key_app,
 )

 UNIVERSAL_LABEL = "universal"
@ -162,7 +153,7 @@ if __name__ == "__main__":
    if args.private_key:
        private_key = args.private_key
    elif args.private_key_path:
-        with open(args.private_key_path, "r") as key_file:
+        with open(args.private_key_path, "r", encoding="utf-8") as key_file:
            private_key = key_file.read()
    else:
        print("Attempt to get key and id from AWS secret manager")
--- a/tests/ci/clean_lost_instances_lambda/app.py
+++ b/tests/ci/clean_lost_instances_lambda/app.py
@ -8,14 +8,13 @@ Lambda function to:

 import argparse
 import sys
-from datetime import datetime
 from dataclasses import dataclass
+from datetime import datetime
 from typing import Dict, List

-import requests
 import boto3  # type: ignore
+import requests
 from botocore.exceptions import ClientError  # type: ignore
-
 from lambda_shared import (
    RUNNER_TYPE_LABELS,
    RunnerDescription,
@ -23,9 +22,9 @@ from lambda_shared import (
    list_runners,
 )
 from lambda_shared.token import (
+    get_access_token_by_key_app,
    get_cached_access_token,
    get_key_and_app_from_aws,
-    get_access_token_by_key_app,
 )

 UNIVERSAL_LABEL = "universal"
@ -140,6 +139,7 @@ def delete_runner(access_token: str, runner: RunnerDescription) -> bool:
    response = requests.delete(
        f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}",
        headers=headers,
+        timeout=30,
    )
    response.raise_for_status()
    print(f"Response code deleting {runner.name} is {response.status_code}")
@ -325,7 +325,7 @@ if __name__ == "__main__":
    if args.private_key:
        private_key = args.private_key
    elif args.private_key_path:
-        with open(args.private_key_path, "r") as key_file:
+        with open(args.private_key_path, "r", encoding="utf-8") as key_file:
            private_key = key_file.read()
    else:
        print("Attempt to get key and id from AWS secret manager")
--- a/tests/ci/runner_token_rotation_lambda/app.py
+++ b/tests/ci/runner_token_rotation_lambda/app.py
@ -5,8 +5,7 @@ import sys

 import boto3  # type: ignore
 import requests
-
-from lambda_shared.token import get_cached_access_token, get_access_token_by_key_app
+from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token


 def get_runner_registration_token(access_token):
@ -17,6 +16,7 @@ def get_runner_registration_token(access_token):
    response = requests.post(
        "https://api.github.com/orgs/ClickHouse/actions/runners/registration-token",
        headers=headers,
+        timeout=30,
    )
    response.raise_for_status()
    data = response.json()
@ -43,6 +43,7 @@ def main(access_token, push_to_ssm, ssm_parameter_name):


 def handler(event, context):
+    _, _ = event, context
    main(get_cached_access_token(), True, "github_runner_registration_token")


@ -85,7 +86,7 @@ if __name__ == "__main__":
    if args.private_key:
        private_key = args.private_key
    else:
-        with open(args.private_key_path, "r") as key_file:
+        with open(args.private_key_path, "r", encoding="utf-8") as key_file:
            private_key = key_file.read()

    token = get_access_token_by_key_app(private_key, args.app_id)
--- a/tests/ci/slack_bot_ci_lambda/app.py
+++ b/tests/ci/slack_bot_ci_lambda/app.py
@ -140,7 +140,7 @@ def get_play_url(query):

 def run_clickhouse_query(query):
    url = "https://play.clickhouse.com/?user=play&query=" + requests.compat.quote(query)
-    res = requests.get(url)
+    res = requests.get(url, timeout=30)
    if res.status_code != 200:
        print("Failed to execute query: ", res.status_code, res.content)
        res.raise_for_status()
@ -157,9 +157,9 @@ def split_broken_and_flaky_tests(failed_tests):
    flaky_tests = []
    for name, report, count_prev_str, count_str in failed_tests:
        count_prev, count = int(count_prev_str), int(count_str)
-        if (2 <= count and count_prev < 2) or (count_prev == 1 and count == 1):
+        if (count_prev < 2 <= count) or (count_prev == count == 1):
            # It failed 2 times or more within extended time window, it's definitely broken.
-            # 2 <= count_prev means that it was not reported as broken on previous runs
+            # 2 <= count means that it was not reported as broken on previous runs
            broken_tests.append([name, report])
        elif 0 < count and count_prev == 0:
            # It failed only once, can be a rare flaky test
@ -170,19 +170,18 @@ def split_broken_and_flaky_tests(failed_tests):

 def format_failed_tests_list(failed_tests, failure_type):
    if len(failed_tests) == 1:
-        res = "There is a new {} test:\n".format(failure_type)
+        res = f"There is a new {failure_type} test:\n"
    else:
-        res = "There are {} new {} tests:\n".format(len(failed_tests), failure_type)
+        res = f"There are {len(failed_tests)} new {failure_type} tests:\n"

    for name, report in failed_tests[:MAX_TESTS_TO_REPORT]:
        cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name))
-        res += "-   *{}*  -  <{}|Report>  -  <{}|CI DB> \n".format(
-            name, report, cidb_url
-        )
+        res += f"-   *{name}*  -  <{report}|Report>  -  <{cidb_url}|CI DB> \n"

    if MAX_TESTS_TO_REPORT < len(failed_tests):
-        res += "-   and {} other tests... :this-is-fine-fire:".format(
-            len(failed_tests) - MAX_TESTS_TO_REPORT
+        res += (
+            f"-   and {len(failed_tests) - MAX_TESTS_TO_REPORT} other "
+            "tests... :this-is-fine-fire:"
        )

    return res
@ -221,19 +220,16 @@ def get_too_many_failures_message_impl(failures_count):
        if random.random() < REPORT_NO_FAILURES_PROBABILITY:
            return None
        return "Wow, there are *no failures* at all... 0_o"
-    if curr_failures < MAX_FAILURES:
+    return_none = (
+        curr_failures < MAX_FAILURES
+        or curr_failures < prev_failures
+        or (curr_failures - prev_failures) / prev_failures < 0.2
+    )
+    if return_none:
        return None
    if prev_failures < MAX_FAILURES:
-        return ":alert: *CI is broken: there are {} failures during the last 24 hours*".format(
-            curr_failures
-        )
-    if curr_failures < prev_failures:
-        return None
-    if (curr_failures - prev_failures) / prev_failures < 0.2:
-        return None
-    return "CI is broken and it's getting worse: there are {} failures during the last 24 hours".format(
-        curr_failures
-    )
+        return f":alert: *CI is broken: there are {curr_failures} failures during the last 24 hours*"
+    return "CI is broken and it's getting worse: there are {curr_failures} failures during the last 24 hours"


 def get_too_many_failures_message(failures_count):
@ -252,7 +248,7 @@ def get_failed_checks_percentage_message(percentage):
        return None

    msg = ":alert: " if p > 1 else "Only " if p < 0.5 else ""
-    msg += "*{0:.2f}%* of all checks in master have failed yesterday".format(p)
+    msg += f"*{p:.2f}%* of all checks in master have failed yesterday"
    return msg


@ -278,7 +274,7 @@ def send_to_slack_impl(message):

    payload = SLACK_MESSAGE_JSON.copy()
    payload["text"] = message
-    res = requests.post(SLACK_URL, json.dumps(payload))
+    res = requests.post(SLACK_URL, json.dumps(payload), timeout=30)
    if res.status_code != 200:
        print("Failed to send a message to Slack: ", res.status_code, res.content)
        res.raise_for_status()
@ -297,7 +293,7 @@ def query_and_alert_if_needed(query, get_message_func):
    if msg is None:
        return

-    msg += "\nCI DB query: <{}|link>".format(get_play_url(query))
+    msg += f"\nCI DB query: <{get_play_url(query)}|link>"
    print("Sending message to slack:", msg)
    send_to_slack(msg)

@ -311,6 +307,7 @@ def check_and_alert():


 def handler(event, context):
+    _, _ = event, context
    try:
        check_and_alert()
        return {"statusCode": 200, "body": "OK"}
--- a/tests/ci/team_keys_lambda/app.py
+++ b/tests/ci/team_keys_lambda/app.py
@ -2,13 +2,12 @@

 import argparse
 import json
-
 from datetime import datetime
 from queue import Queue
 from threading import Thread

-import requests
 import boto3  # type: ignore
+import requests


 class Keys(set):
@ -34,7 +33,7 @@ class Worker(Thread):
            m = self.queue.get()
            if m == "":
                break
-            response = requests.get(f"https://github.com/{m}.keys")
+            response = requests.get(f"https://github.com/{m}.keys", timeout=30)
            self.results.add(f"# {m}\n{response.text}\n")
            self.queue.task_done()

@ -45,7 +44,9 @@ def get_org_team_members(token: str, org: str, team_slug: str) -> set:
        "Accept": "application/vnd.github.v3+json",
    }
    response = requests.get(
-        f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", headers=headers
+        f"https://api.github.com/orgs/{org}/teams/{team_slug}/members",
+        headers=headers,
+        timeout=30,
    )
    response.raise_for_status()
    data = response.json()
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@ -8,8 +8,7 @@ from dataclasses import dataclass
 from typing import Any, Dict, List

 import boto3  # type: ignore
-
-from lambda_shared import RunnerDescriptions, list_runners, cached_value_is_valid
+from lambda_shared import RunnerDescriptions, cached_value_is_valid, list_runners
 from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token


@ -134,7 +133,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
        candidates = instances_by_zone[zone]
        total_to_kill += num_to_kill
        if num_to_kill > len(candidates):
-            raise Exception(
+            raise RuntimeError(
                f"Required to kill {num_to_kill}, but have only {len(candidates)}"
                f" candidates in AV {zone}"
            )
@ -196,6 +195,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:


 def handler(event: dict, context: Any) -> Dict[str, List[str]]:
+    _ = context
    return main(get_cached_access_token(), event)


@ -226,7 +226,7 @@ if __name__ == "__main__":
    if args.private_key:
        private_key = args.private_key
    else:
-        with open(args.private_key_path, "r") as key_file:
+        with open(args.private_key_path, "r", encoding="utf-8") as key_file:
            private_key = key_file.read()

    token = get_access_token_by_key_app(private_key, args.app_id)
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@ -90,26 +90,29 @@ def is_trusted_contributor(pr_user_login, pr_user_orgs):

 def _exec_get_with_retry(url, token):
    headers = {"Authorization": f"token {token}"}
+    e = Exception()
    for i in range(MAX_RETRY):
        try:
-            response = requests.get(url, headers=headers)
+            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()
            return response.json()
        except Exception as ex:
            print("Got exception executing request", ex)
+            e = ex
            time.sleep(i + 1)

-    raise Exception("Cannot execute GET request with retries")
+    raise requests.HTTPError("Cannot execute GET request with retries") from e


 def _exec_post_with_retry(url, token, data=None):
    headers = {"Authorization": f"token {token}"}
+    e = Exception()
    for i in range(MAX_RETRY):
        try:
            if data:
-                response = requests.post(url, headers=headers, json=data)
+                response = requests.post(url, headers=headers, json=data, timeout=30)
            else:
-                response = requests.post(url, headers=headers)
+                response = requests.post(url, headers=headers, timeout=30)
            if response.status_code == 403:
                data = response.json()
                if (
@ -123,9 +126,10 @@ def _exec_post_with_retry(url, token, data=None):
            return response.json()
        except Exception as ex:
            print("Got exception executing request", ex)
+            e = ex
            time.sleep(i + 1)

-    raise Exception("Cannot execute POST request with retry")
+    raise requests.HTTPError("Cannot execute POST request with retry") from e


 def _get_pull_requests_from(repo_url, owner, branch, token):
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@ -152,7 +152,15 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
    grep -vP $EXCLUDE_DIRS |
    xargs xmllint --noout --nonet

-pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test $ROOT_PATH/tests/**/*.py
+function xargs-pylint {
+  # $1 is number maximum arguments per pylint process
+  sort | awk '$2=="text/x-script.python" {print $1}' | \
+  xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/.pylintrc" --persistent=no --score=n
+}
+
+find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50
+# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo
+find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1

 find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
    grep -vP $EXCLUDE_DIRS |