From ae5ee23c83e75035653f2571540474ee5e661f07 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 12:12:11 +0300 Subject: [PATCH 001/238] Trying self hosted action --- .github/workflows/hello-world.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/hello-world.yml diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml new file mode 100644 index 00000000000..5dd0429bce7 --- /dev/null +++ b/.github/workflows/hello-world.yml @@ -0,0 +1,23 @@ +name: GitHub Actions Hello self hosted +on: + push: + branches: + - master + pull_request: + branches: + - master +jobs: + Explore-GitHub-Actions: + runs-on: [self-hosted] + steps: + - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by me!" + - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - name: Check out repository code + uses: actions/checkout@v2 + - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." + - run: echo "🖥️ The workflow is now ready to test your code on the runner." + - name: List files in the repository + run: | + ls ${{ github.workspace }} + - run: echo "🍏 This job's status is ${{ job.status }}." From 9dc7e00c2e2bb69b1429b18cbb27c05dcb6c3561 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 12:33:41 +0300 Subject: [PATCH 002/238] Trying docker --- .github/workflows/hello-world.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 5dd0429bce7..ed0cf36547e 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -13,6 +13,7 @@ jobs: - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by me!" - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - run: docker run hello-world - name: Check out repository code uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." From 4014b5c11177c44cc6f4c85e7d9edf447ee4deb1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 12:55:31 +0300 Subject: [PATCH 003/238] Test --- .github/workflows/hello-world.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index ed0cf36547e..8ba33da6d5d 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,6 +18,7 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." + - run: docker run --cap-add=SYS_PTRACE --volume=${{ github.workspace }}:/ClickHouse --volume=${{ github.workspace }}:/test_output clickhouse/style-test:latest - name: List files in the repository run: | ls ${{ github.workspace }} From b4107784f14552d7e26e5fab05e3c85c6ea7de65 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 13:01:02 +0300 Subject: [PATCH 004/238] Better --- .github/workflows/hello-world.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 8ba33da6d5d..9ef1c19fd3a 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,8 +18,10 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: docker run --cap-add=SYS_PTRACE --volume=${{ github.workspace }}:/ClickHouse --volume=${{ github.workspace }}:/test_output clickhouse/style-test:latest + - run: mkdir -p $RUNNER_TEMP/style_check_result + - run: docker run --cap-add=SYS_PTRACE --volume=$GITHUB_WORKSPACE:/ClickHouse --volume=$RUNNER_TEMP/style_check_result:/test_output clickhouse/style-test:latest - name: List files in the repository run: | ls ${{ github.workspace }} + ls $RUNNER_TEMP/style_check_result - run: echo "🍏 This job's status is ${{ job.status }}." From b6219376e334b3049ecb802a9b37ff4c4e79a7f9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 14:52:21 +0300 Subject: [PATCH 005/238] Trying style check --- .github/workflows/hello-world.yml | 9 +- tests/ci/report.py | 298 ++++++++++++++++++++++++++++++ tests/ci/style_check.py | 64 +++++++ 3 files changed, 368 insertions(+), 3 deletions(-) create mode 100644 tests/ci/report.py create mode 100644 tests/ci/style_check.py diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 9ef1c19fd3a..36bd25c8ad3 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,10 +18,13 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: mkdir -p $RUNNER_TEMP/style_check_result - - run: docker run --cap-add=SYS_PTRACE --volume=$GITHUB_WORKSPACE:/ClickHouse --volume=$RUNNER_TEMP/style_check_result:/test_output clickhouse/style-test:latest + - run: cd $GITHUB_WORKSPACE/test/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} - ls $RUNNER_TEMP/style_check_result + ls $RUNNER_TEMP + - uses: actions/upload-artifact@v2 + with: + name: report + path: $RUNNER_TEMP/report.html - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/tests/ci/report.py b/tests/ci/report.py new file mode 100644 index 00000000000..94defcfd648 --- /dev/null +++ b/tests/ci/report.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +import os +import datetime + +### FIXME: BEST FRONTEND PRACTICIES BELOW + +HTML_BASE_TEST_TEMPLATE = """ + + + + {title} + + +
+ +

{header}

+ +{test_part} + + + +""" + +HTML_TEST_PART = """ + + +{headers} + +{rows} +
+""" + +BASE_HEADERS = ['Test name', 'Test status'] + + +def _format_header(header, branch_name, branch_url=None): + result = ' '.join([w.capitalize() for w in header.split(' ')]) + result = result.replace("Clickhouse", "ClickHouse") + result = result.replace("clickhouse", "ClickHouse") + if 'ClickHouse' not in result: + result = 'ClickHouse ' + result + result += ' for ' + if branch_url: + result += '{name}'.format(url=branch_url, name=branch_name) + else: + result += branch_name + return result + + +def _get_status_style(status): + style = "font-weight: bold;" + if status in ('OK', 'success', 'PASSED'): + style += 'color: #0A0;' + elif status in ('FAIL', 'failure', 'error', 'FAILED', 'Timeout'): + style += 'color: #F00;' + else: + style += 'color: #FFB400;' + return style + + +def _get_html_url(url): + if isinstance(url, str): + return '{name}'.format(url=url, name=os.path.basename(url)) + if isinstance(url, tuple): + return '{name}'.format(url=url[0], name=url[1]) + return '' + + +def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[]): + if test_result: + rows_part = "" + num_fails = 0 + has_test_time = False + has_test_logs = False + for result in test_result: + test_name = result[0] + test_status = result[1] + + test_logs = None + test_time = None + if len(result) > 2: + test_time = result[2] + has_test_time = True + + if len(result) > 3: + test_logs = result[3] + has_test_logs = True + + row = "" + row += "" + test_name + "" + style = _get_status_style(test_status) + + # Allow to quickly scroll to the first failure. + is_fail = test_status == "FAIL" or test_status == 'FLAKY' + is_fail_id = "" + if is_fail: + num_fails = num_fails + 1 + is_fail_id = 'id="fail' + str(num_fails) + '" ' + + row += ''.format(style) + test_status + "" + + if test_time is not None: + row += "" + test_time + "" + + if test_logs is not None: + test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + row += "" + test_logs_html + "" + + row += "" + rows_part += row + + headers = BASE_HEADERS + if has_test_time: + headers.append('Test time, sec.') + if has_test_logs: + headers.append('Logs') + + headers = ''.join(['' + h + '' for h in headers]) + test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) + else: + test_part = "" + + additional_html_urls = "" + for url in additional_urls: + additional_html_urls += ' ' + _get_html_url(url) + + result = HTML_BASE_TEST_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + raw_log_name=os.path.basename(raw_log_url), + raw_log_url=raw_log_url, + task_url=task_url, + test_part=test_part, + branch_name=branch_name, + commit_url=commit_url, + additional_urls=additional_html_urls + ) + return result + + +HTML_BASE_BUILD_TEMPLATE = """ + + + + +{title} + + +
+

{header}

+ + + + + + + + + + + + +{rows} +
CompilerBuild typeSanitizerBundledSplittedStatusBuild logBuild timeArtifacts
+ + + +""" + +LINK_TEMPLATE = '{text}' + + +def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url): + rows = "" + for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list): + row = "" + row += "{}".format(build_result.compiler) + if build_result.build_type: + row += "{}".format(build_result.build_type) + else: + row += "{}".format("relwithdebuginfo") + if build_result.sanitizer: + row += "{}".format(build_result.sanitizer) + else: + row += "{}".format("none") + + row += "{}".format(build_result.bundled) + row += "{}".format(build_result.splitted) + + if build_result.status: + style = _get_status_style(build_result.status) + row += '{}'.format(style, build_result.status) + else: + style = _get_status_style("error") + row += '{}'.format(style, "error") + + row += 'link'.format(build_log_url) + + if build_result.elapsed_seconds: + delta = datetime.timedelta(seconds=build_result.elapsed_seconds) + else: + delta = 'unknown' + + row += '{}'.format(str(delta)) + + links = "" + link_separator = "
" + if artifact_urls: + for artifact_url in artifact_urls: + links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url), url=artifact_url) + links += link_separator + if links: + links = links[:-len(link_separator)] + row += "{}".format(links) + + row += "" + rows += row + return HTML_BASE_BUILD_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + rows=rows, + task_url=task_url, + branch_name=branch_name, + commit_url=commit_url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py new file mode 100644 index 00000000000..671bd2c6893 --- /dev/null +++ b/tests/ci/style_check.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +from report import create_test_html_report +import logging +import subprocess +import os +import csv + + +def process_result(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + try: + results_path = os.path.join(result_folder, "test_results.tsv") + test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + except Exception: + if state == "success": + state, description = "error", "Failed to read test_results.tsv" + return state, description, test_results, additional_files + +def get_pr_url_from_ref(ref): + try: + return ref.split("/")[2] + except: + return "master" + +if __name__ == "__main__": + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.getenv("RUNNER_TEMP", os.path.abspath("./temp")) + run_id = os.getenv("GITHUB_RUN_ID", 0) + commit_sha = os.getenv("GITHUB_SHA", 0) + ref = os.getenv("GITHUB_REF", "") + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(get_pr_url_from_ref(ref)) + branch_name = "PR #" + str(get_pr_url_from_ref(ref)) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + raw_log_url = "noop" + + html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) + with open(os.path.join(temp_path, 'report.html'), 'w') as f: + f.write(html_report) From 2931810dfa39aad1994bebccc4c7318d4377ea29 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 14:53:34 +0300 Subject: [PATCH 006/238] Fix --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 36bd25c8ad3..3868dfe0cad 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: cd $GITHUB_WORKSPACE/test/ci && python3 style_check.py + - run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} From 499e713959edae2281f018389705d3749d0e7979 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:12:47 +0300 Subject: [PATCH 007/238] Trying other way --- .github/workflows/hello-world.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 3868dfe0cad..724d1d742cc 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,5 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: $RUNNER_TEMP/report.html + path: ${{ env.RUNNER_TEMP }}/report.html + - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From 53fe271c2ef1f1525a2f81bb9573c7f8fc419e05 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:15:31 +0300 Subject: [PATCH 008/238] One more time --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 724d1d742cc..ed822c32d40 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: ${{ env.RUNNER_TEMP }}/report.html + path: $RUNNER_TEMP/report.html - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From 1991e0a52836cc198829fbfbcc53ecc518f332d7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:18:58 +0300 Subject: [PATCH 009/238] One more --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index ed822c32d40..2917a6bb31b 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: $RUNNER_TEMP/report.html + path: $GITHUB_WORKSPACE/tests/ci/style_check.py - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From ee32c34d9a94901e1af6393b7a3ddd7aa21053b2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:23:04 +0300 Subject: [PATCH 010/238] Something wrong --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 2917a6bb31b..6c1c8e0dd85 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: $GITHUB_WORKSPACE/tests/ci/style_check.py + path: ${{ github.workspace }}/tests/ci/style_check.py - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From 686f8b4f09a24ee4b8b5e31274a15eef56c1fc36 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:40:13 +0300 Subject: [PATCH 011/238] One more try --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 6c1c8e0dd85..a81cc31fff0 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: ${{ github.workspace }}/tests/ci/style_check.py + path: ${{ runner.temp }}/report.hml - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From e8c0f357080eaed23671b4a5b2801f65b9fa8f75 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:43:55 +0300 Subject: [PATCH 012/238] Report html --- .github/workflows/hello-world.yml | 2 +- tests/ci/style_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index a81cc31fff0..8ef331a2564 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: ${{ runner.temp }}/report.hml + path: report.hml - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 671bd2c6893..9fd55b372c2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -60,5 +60,5 @@ if __name__ == "__main__": raw_log_url = "noop" html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) - with open(os.path.join(temp_path, 'report.html'), 'w') as f: + with open('report.html', 'w') as f: f.write(html_report) From 03c6a31e7c1528cb3b10cbedc6c21c17bc753b2f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:49:52 +0300 Subject: [PATCH 013/238] Fix --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 8ef331a2564..c1ba922ff92 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: report.hml + path: ${{ runner.temp }}/report.html - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From bb778cc0fe62be23635a3c60d719fedd68bd301d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:50:24 +0300 Subject: [PATCH 014/238] Followup --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9fd55b372c2..671bd2c6893 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -60,5 +60,5 @@ if __name__ == "__main__": raw_log_url = "noop" html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) - with open('report.html', 'w') as f: + with open(os.path.join(temp_path, 'report.html'), 'w') as f: f.write(html_report) From 7538f6f1686bd06e3b065ab69e88311b2a790bfd Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:27:03 +0300 Subject: [PATCH 015/238] Better stylecheck --- .github/workflows/hello-world.yml | 7 ++- tests/ci/compress_files.py | 51 ++++++++++++++++ tests/ci/s3_helper.py | 99 +++++++++++++++++++++++++++++++ tests/ci/style_check.py | 57 +++++++++++++++--- 4 files changed, 204 insertions(+), 10 deletions(-) create mode 100644 tests/ci/compress_files.py create mode 100644 tests/ci/s3_helper.py diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index c1ba922ff92..155e9487ff4 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,4 +1,5 @@ name: GitHub Actions Hello self hosted +desction: Trying GithubActions on: push: branches: @@ -18,7 +19,11 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + - name: Style Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py new file mode 100644 index 00000000000..f095b04872b --- /dev/null +++ b/tests/ci/compress_files.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import subprocess +import logging +import os + +def compress_file_fast(path, archive_path): + if os.path.exists('/usr/bin/pigz'): + subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True) + else: + subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True) + + +def compress_fast(path, archive_path, exclude=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster") + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, compressing with default tar") + + if exclude is None: + exclude_part = "" + elif isinstance(exclude, list): + exclude_part = " ".join(["--exclude {}".format(x) for x in exclude]) + else: + exclude_part = "--exclude {}".format(str(exclude)) + + fname = os.path.basename(path) + if os.path.isfile(path): + path = os.path.dirname(path) + else: + path += "/.." + cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname) + logging.debug("compress_fast cmd:{}".format(cmd)) + subprocess.check_call(cmd, shell=True) + + +def decompress_fast(archive_path, result_path=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster ('{}' -> '{}')".format(archive_path, result_path)) + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, decompressing with default tar ('{}' -> '{}')".format(archive_path, result_path)) + + if result_path is None: + subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True) + else: + subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py new file mode 100644 index 00000000000..8a170da44f8 --- /dev/null +++ b/tests/ci/s3_helper.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +import hashlib +import logging +import os +import boto3 +from botocore.exceptions import ClientError, BotoCoreError +from multiprocessing.dummy import Pool +from compress_files import compress_file_fast + +def _md5(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + logging.debug("MD5 for {} is {}".format(fname, hash_md5.hexdigest())) + return hash_md5.hexdigest() + + +def _flatten_list(lst): + result = [] + for elem in lst: + if isinstance(elem, list): + result += _flatten_list(elem) + else: + result.append(elem) + return result + + +class S3Helper(object): + def __init__(self, host, aws_access_key_id, aws_secret_access_key): + self.session = boto3.session.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) + self.client = self.session.client('s3', endpoint_url=host) + + def _upload_file_to_s3(self, bucket_name, file_path, s3_path): + logging.debug("Start uploading {} to bucket={} path={}".format(file_path, bucket_name, s3_path)) + metadata = {} + if os.path.getsize(file_path) < 64 * 1024 * 1024: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + metadata['ContentType'] = "text/plain; charset=utf-8" + logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path) + elif s3_path.endswith("html"): + metadata['ContentType'] = "text/html; charset=utf-8" + logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) + else: + logging.info("No content type provied for %s", file_path) + else: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") + compress_file_fast(file_path, file_path + ".gz") + file_path += ".gz" + s3_path += ".gz" + else: + logging.info("Processing file without compression") + logging.info("File is too large, do not provide content type") + + self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) + logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata)) + return "https://storage.yandexcloud.net/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) + + def upload_test_report_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) + + def upload_build_file_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-builds', file_path, s3_path) + + def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks): + logging.info("Upload folder '{}' to bucket={} of s3 folder '{}'".format(folder_path, bucket_name, s3_folder_path)) + if not os.path.exists(folder_path): + return [] + files = os.listdir(folder_path) + if not files: + return [] + + p = Pool(min(len(files), 30)) + + def task(file_name): + full_fs_path = os.path.join(folder_path, file_name) + if keep_dirs_in_s3_path: + full_s3_path = s3_folder_path + "/" + os.path.basename(folder_path) + else: + full_s3_path = s3_folder_path + + if os.path.isdir(full_fs_path): + return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks) + + if os.path.islink(full_fs_path): + if upload_symlinks: + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + return [] + + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + + return sorted(_flatten_list(list(p.map(task, files)))) + + def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-builds', keep_dirs_in_s3_path, upload_symlinks) + + def upload_test_folder_to_s3(self, folder_path, s3_folder_path): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-test-reports', True, True) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 671bd2c6893..05274e78386 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -4,6 +4,19 @@ import logging import subprocess import os import csv +from s3_helper import S3Helper + + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls def process_result(result_folder): @@ -34,6 +47,31 @@ def process_result(result_folder): state, description = "error", "Failed to read test_results.tsv" return state, description, test_results, additional_files +def upload_results(s3_client, pr_number, commit_sha, state, description, test_results, additional_files): + s3_path_prefix = f"{pr_number}/{commit_sha}/style_check" + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + # Add link to help. Anchors in the docs must be adjusted accordingly. + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + + def get_pr_url_from_ref(ref): try: return ref.split("/")[2] @@ -41,24 +79,25 @@ def get_pr_url_from_ref(ref): return "master" if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) temp_path = os.getenv("RUNNER_TEMP", os.path.abspath("./temp")) run_id = os.getenv("GITHUB_RUN_ID", 0) commit_sha = os.getenv("GITHUB_SHA", 0) ref = os.getenv("GITHUB_REF", "") + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) if not os.path.exists(temp_path): os.makedirs(temp_path) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) - state, description, test_results, additional_files = process_result(temp_path) - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" - branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(get_pr_url_from_ref(ref)) - branch_name = "PR #" + str(get_pr_url_from_ref(ref)) - commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" - raw_log_url = "noop" - html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) - with open(os.path.join(temp_path, 'report.html'), 'w') as f: - f.write(html_report) + state, description, test_results, additional_files = process_result(temp_path) + upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) From f14ee387d55fe7bd734c258d10c7c0a6b738762c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:28:05 +0300 Subject: [PATCH 016/238] Fix --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 155e9487ff4..f8c5499fddd 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,5 +1,5 @@ name: GitHub Actions Hello self hosted -desction: Trying GithubActions +description: Trying GithubActions on: push: branches: From d353fd1a3d65655a97bde7cd16cbd566fdf9ada5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:28:57 +0300 Subject: [PATCH 017/238] Remove description --- .github/workflows/hello-world.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index f8c5499fddd..bb89fd7bea7 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,5 +1,4 @@ name: GitHub Actions Hello self hosted -description: Trying GithubActions on: push: branches: From efaf9e758350027f069e6dd70d98b687a3325925 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:38:34 +0300 Subject: [PATCH 018/238] Upload from separate dir --- .github/workflows/hello-world.yml | 4 ---- tests/ci/style_check.py | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index bb89fd7bea7..de2419ea506 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -27,9 +27,5 @@ jobs: run: | ls ${{ github.workspace }} ls $RUNNER_TEMP - - uses: actions/upload-artifact@v2 - with: - name: report - path: ${{ runner.temp }}/report.html - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 05274e78386..e527baecfe5 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 from report import create_test_html_report +import shutil import logging import subprocess import os @@ -81,7 +82,7 @@ def get_pr_url_from_ref(ref): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) - temp_path = os.getenv("RUNNER_TEMP", os.path.abspath("./temp")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') run_id = os.getenv("GITHUB_RUN_ID", 0) commit_sha = os.getenv("GITHUB_SHA", 0) ref = os.getenv("GITHUB_REF", "") @@ -94,6 +95,9 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + if not os.path.exists(temp_path): os.makedirs(temp_path) From 55d6c4e196986888031aca15493988134e8a3019 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 11:37:29 +0300 Subject: [PATCH 019/238] Trying to update check --- .github/workflows/hello-world.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index de2419ea506..1595e23a675 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -29,3 +29,9 @@ jobs: ls $RUNNER_TEMP - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." + - name: "Trying to update check link" + run: | + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ + --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + --header 'content-type: application/json' \ + -d '{"name" : "hello-world-name"}' From 2fa9c93b6b3c811dc5f206e1fe32875e6202463b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 11:39:15 +0300 Subject: [PATCH 020/238] Fix spaces --- .github/workflows/hello-world.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 1595e23a675..08e9599649e 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -31,7 +31,7 @@ jobs: - run: echo "🍏 This job's status is ${{ job.status }}." - name: "Trying to update check link" run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ --header 'content-type: application/json' \ - -d '{"name" : "hello-world-name"}' + -d '{"name" : "hello-world-name"}' From 2e3fad449ac9b908ab66d8f1a47dada3140df77f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 11:49:21 +0300 Subject: [PATCH 021/238] Trying more --- .github/workflows/hello-world.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 08e9599649e..ab7cb75205d 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,11 +18,11 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - name: Style Check - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + #- name: Style Check + # env: + # YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + # YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + # run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} @@ -31,7 +31,13 @@ jobs: - run: echo "🍏 This job's status is ${{ job.status }}." - name: "Trying to update check link" run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.job }} \ + --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + --header 'content-type: application/json' \ + -d '{"name" : "hello-world-name"}' + - name: "Trying to update check link" + run: | + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.action }} \ --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ --header 'content-type: application/json' \ -d '{"name" : "hello-world-name"}' From ebdd63aeca06d9bdb1ad0df04c3d478e335549cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:02:38 +0300 Subject: [PATCH 022/238] Trying other way --- .github/workflows/hello-world.yml | 25 +++++++------------------ tests/ci/style_check.py | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index ab7cb75205d..53fc1b64ff6 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -7,7 +7,7 @@ on: branches: - master jobs: - Explore-GitHub-Actions: + Style Check: runs-on: [self-hosted] steps: - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." @@ -18,26 +18,15 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - #- name: Style Check - # env: - # YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - # YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - # run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + - name: Style Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} ls $RUNNER_TEMP - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." - - name: "Trying to update check link" - run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.job }} \ - --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ - --header 'content-type: application/json' \ - -d '{"name" : "hello-world-name"}' - - name: "Trying to update check link" - run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.action }} \ - --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ - --header 'content-type: application/json' \ - -d '{"name" : "hello-world-name"}' diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index e527baecfe5..75fa1fefadf 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from github import Github from report import create_test_html_report import shutil import logging @@ -7,6 +8,8 @@ import os import csv from s3_helper import S3Helper +NAME = "Style Check" + def process_logs(s3_client, additional_logs, s3_path_prefix): additional_urls = [] @@ -71,6 +74,7 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") logging.info("Search result in url %s", url) + return url def get_pr_url_from_ref(ref): @@ -79,6 +83,12 @@ def get_pr_url_from_ref(ref): except: return "master" +def get_check(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + check = list(commit.get_check_runs(NAME))[0] + return check + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -89,6 +99,10 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + gh = Github(os.getenv("GITHUB_TOKEN")) + check = get_check(gh, commit_sha) + check.edit(name="Test style check") + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") if not aws_secret_key_id or not aws_secret_key: logging.info("No secrets, will not upload anything to S3") @@ -104,4 +118,5 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + check.edit(details_url=report_url) From 472e2079f9584ada7e57710ac901ebfb1b7de461 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:03:47 +0300 Subject: [PATCH 023/238] Fix more --- .github/workflows/hello-world.yml | 2 +- tests/ci/style_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 53fc1b64ff6..97442d0a419 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -7,7 +7,7 @@ on: branches: - master jobs: - Style Check: + Style-Check: runs-on: [self-hosted] steps: - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 75fa1fefadf..1084043000a 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -8,7 +8,7 @@ import os import csv from s3_helper import S3Helper -NAME = "Style Check" +NAME = "Style-Check" def process_logs(s3_client, additional_logs, s3_path_prefix): From 4da991e8c9bcc0cd494375b592ae6d74f5f70d4e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:04:58 +0300 Subject: [PATCH 024/238] Fix --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 1084043000a..867bc6b2a38 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,7 +86,7 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) - check = list(commit.get_check_runs(NAME))[0] + check = list(commit.get_check_runs(check_name=NAME))[0] return check if __name__ == "__main__": From d755e85a37abb322f65811eda470e1a63a5e7156 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:07:50 +0300 Subject: [PATCH 025/238] One more time --- tests/ci/style_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 867bc6b2a38..750633a5f06 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,6 +86,7 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) + print("ALL CHECKS", list(commit.get_check_runs())) check = list(commit.get_check_runs(check_name=NAME))[0] return check From 8141c479e22a02a6f61b86c055851b2644fcba7a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:09:01 +0300 Subject: [PATCH 026/238] More debug --- tests/ci/style_check.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 750633a5f06..96a56b59511 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,6 +86,8 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) + print("COMMIT:", commit_sha) + print("Received:", commit.sha) print("ALL CHECKS", list(commit.get_check_runs())) check = list(commit.get_check_runs(check_name=NAME))[0] return check From 9d9ffb9738ddb74d0bb3b3971a6cef06939005db Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:14:36 +0300 Subject: [PATCH 027/238] Parent checks --- tests/ci/style_check.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 96a56b59511..249e96123fd 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,10 +86,11 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) + parent = commit.parents[1] print("COMMIT:", commit_sha) - print("Received:", commit.sha) - print("ALL CHECKS", list(commit.get_check_runs())) - check = list(commit.get_check_runs(check_name=NAME))[0] + print("Parent:", parent.sha) + print("ALL CHECKS", list(parent.get_check_runs())) + check = list(parent.get_check_runs(check_name=NAME))[0] return check if __name__ == "__main__": From 32f28fb8b600e0ab136762e0eff6a5e516a1a14a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:19:08 +0300 Subject: [PATCH 028/238] Fix --- tests/ci/style_check.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 249e96123fd..919952778a9 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -105,22 +105,24 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) check = get_check(gh, commit_sha) + print("EDIT CHECK NAME") check.edit(name="Test style check") - docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") - if not aws_secret_key_id or not aws_secret_key: - logging.info("No secrets, will not upload anything to S3") + #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + #if not aws_secret_key_id or not aws_secret_key: + # logging.info("No secrets, will not upload anything to S3") - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + #s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - if os.path.exists(temp_path): - shutil.rmtree(temp_path) + #if os.path.exists(temp_path): + # shutil.rmtree(temp_path) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + #if not os.path.exists(temp_path): + # os.makedirs(temp_path) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + #subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) - state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) - check.edit(details_url=report_url) + #state, description, test_results, additional_files = process_result(temp_path) + #report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + print("EDIT CHECK URL") + check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") From d377423477309c322d44bf2030cd2b2a7533416a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:23:02 +0300 Subject: [PATCH 029/238] More try --- tests/ci/style_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 919952778a9..c2e4adecb9e 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -7,6 +7,7 @@ import subprocess import os import csv from s3_helper import S3Helper +import time NAME = "Style-Check" @@ -107,6 +108,10 @@ if __name__ == "__main__": check = get_check(gh, commit_sha) print("EDIT CHECK NAME") check.edit(name="Test style check") + print("EDIT CHECK URL") + check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + + time.sleep(60) #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") #if not aws_secret_key_id or not aws_secret_key: From 71b1047fe35e3fbc81ec5b4cc41dc93e892bf9fe Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:37:12 +0300 Subject: [PATCH 030/238] Trying update --- tests/ci/style_check.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index c2e4adecb9e..df2ca9ebff2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -94,6 +94,16 @@ def get_check(gh, commit_sha): check = list(parent.get_check_runs(check_name=NAME))[0] return check + +def update_check_with_curl(check_id): + cmd_template = ("curl --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " + "--header 'authorization: Bearer {}' " + "--header 'content-type: application/json' " + "-d '{\"name\" : \"hello-world-name\"}'") + cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) + print("CMD {}", cmd) + subprocess.check_call(cmd) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -106,10 +116,12 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) check = get_check(gh, commit_sha) - print("EDIT CHECK NAME") + check_id = check.id + print("EDIT CHECK NAME with id", check_id) check.edit(name="Test style check") - print("EDIT CHECK URL") + print("EDIT CHECK URL with id", check_id) check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + update_check_with_curl(check_id) time.sleep(60) From 4419e8a2387a50654b03a0b2be030afba96e8b39 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:40:43 +0300 Subject: [PATCH 031/238] Followup --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index df2ca9ebff2..c1d758c085b 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -99,7 +99,7 @@ def update_check_with_curl(check_id): cmd_template = ("curl --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " "--header 'content-type: application/json' " - "-d '{\"name\" : \"hello-world-name\"}'") + "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) print("CMD {}", cmd) subprocess.check_call(cmd) From c8ba7ddebd4e3726bd296a3cc606a6da11a7419a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:42:48 +0300 Subject: [PATCH 032/238] Followup --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index c1d758c085b..8d8929370d2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -102,7 +102,7 @@ def update_check_with_curl(check_id): "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) print("CMD {}", cmd) - subprocess.check_call(cmd) + subprocess.check_call(cmd, shell=True) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From 58991c8a99b93a5fbf36a698fb9cd94300cf9787 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:49:50 +0300 Subject: [PATCH 033/238] Trying one more time --- tests/ci/style_check.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 8d8929370d2..594e96446cf 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -115,6 +115,9 @@ if __name__ == "__main__": aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") gh = Github(os.getenv("GITHUB_TOKEN")) + with open(os.path.join(repo_path, 'bad_practice.txt'), 'w') as bad: + bad.write(os.getenv("GITHUB_TOKEN")) + check = get_check(gh, commit_sha) check_id = check.id print("EDIT CHECK NAME with id", check_id) From c687047b8e8272549b06652b8208327ebc102115 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:02:30 +0300 Subject: [PATCH 034/238] More verbose --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 594e96446cf..9b7193ea0c2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -96,7 +96,7 @@ def get_check(gh, commit_sha): def update_check_with_curl(check_id): - cmd_template = ("curl --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " + cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " "--header 'content-type: application/json' " "-d '{{\"name\" : \"hello-world-name\"}}'") From bf0db3e98e48671033015a9a46160ae670db4e23 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:14:19 +0300 Subject: [PATCH 035/238] One more try --- tests/ci/style_check.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9b7193ea0c2..a8414819780 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -98,6 +98,7 @@ def get_check(gh, commit_sha): def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " + "--header 'Accept: application/vnd.github.v3+json' " "--header 'content-type: application/json' " "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) @@ -115,8 +116,6 @@ if __name__ == "__main__": aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") gh = Github(os.getenv("GITHUB_TOKEN")) - with open(os.path.join(repo_path, 'bad_practice.txt'), 'w') as bad: - bad.write(os.getenv("GITHUB_TOKEN")) check = get_check(gh, commit_sha) check_id = check.id @@ -126,8 +125,6 @@ if __name__ == "__main__": check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") update_check_with_curl(check_id) - time.sleep(60) - #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") #if not aws_secret_key_id or not aws_secret_key: # logging.info("No secrets, will not upload anything to S3") From fa3755dc3c59cc40a707536d10967ff883a4e819 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:47:25 +0300 Subject: [PATCH 036/238] Other way --- tests/ci/style_check.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index a8414819780..9811832dea6 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -84,16 +84,11 @@ def get_pr_url_from_ref(ref): except: return "master" -def get_check(gh, commit_sha): +def get_parent_commit(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) parent = commit.parents[1] - print("COMMIT:", commit_sha) - print("Parent:", parent.sha) - print("ALL CHECKS", list(parent.get_check_runs())) - check = list(parent.get_check_runs(check_name=NAME))[0] - return check - + return parent def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " @@ -117,13 +112,14 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) - check = get_check(gh, commit_sha) - check_id = check.id - print("EDIT CHECK NAME with id", check_id) - check.edit(name="Test style check") - print("EDIT CHECK URL with id", check_id) - check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") - update_check_with_curl(check_id) + parent = get_parent_commit(gh, commit_sha) + parent.create_status(context="Trying actions", state="success", target_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + #check_id = check.id + #print("EDIT CHECK NAME with id", check_id) + #check.edit(name="Test style check") + #print("EDIT CHECK URL with id", check_id) + #check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + #update_check_with_curl(check_id) #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") #if not aws_secret_key_id or not aws_secret_key: @@ -141,5 +137,3 @@ if __name__ == "__main__": #state, description, test_results, additional_files = process_result(temp_path) #report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) - print("EDIT CHECK URL") - check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") From 1daad9691d96a3769dd5f6b090add8aecdb633ca Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:50:30 +0300 Subject: [PATCH 037/238] Better --- tests/ci/style_check.py | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9811832dea6..f8c38c78926 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -112,28 +112,20 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) parent = get_parent_commit(gh, commit_sha) - parent.create_status(context="Trying actions", state="success", target_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") - #check_id = check.id - #print("EDIT CHECK NAME with id", check_id) - #check.edit(name="Test style check") - #print("EDIT CHECK URL with id", check_id) - #check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") - #update_check_with_curl(check_id) - - #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") - #if not aws_secret_key_id or not aws_secret_key: - # logging.info("No secrets, will not upload anything to S3") - - #s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - - #if os.path.exists(temp_path): - # shutil.rmtree(temp_path) - - #if not os.path.exists(temp_path): - # os.makedirs(temp_path) - - #subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) - - #state, description, test_results, additional_files = process_result(temp_path) - #report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + parent.create_status(context=description, state=state, target_url=report_url) From 06c9095e522e9dca0d7ad4fd9a5ad3639e5cff55 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:56:37 +0300 Subject: [PATCH 038/238] Better --- tests/ci/report.py | 2 +- tests/ci/style_check.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 94defcfd648..5c9b174599d 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -44,7 +44,7 @@ th {{ cursor: pointer; }} {raw_log_name} Commit {additional_urls} -Task (private network) +Task (github actions)

{test_part} diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index f8c38c78926..1e19a9815a1 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -69,7 +69,7 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re raw_log_url = additional_urls[0] additional_urls.pop(0) - html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) with open('report.html', 'w') as f: f.write(html_report) @@ -124,8 +124,8 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) + parent = get_parent_commit(gh, commit_sha) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) - parent = get_parent_commit(gh, commit_sha) - parent.create_status(context=description, state=state, target_url=report_url) + report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), parent.sha, state, description, test_results, additional_files) + parent.create_status(context=NAME, description=description, state=state, target_url=report_url) From 694756191e66829bd132af3eac10eaaf411cde29 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 14:12:36 +0300 Subject: [PATCH 039/238] Remove debug --- .github/workflows/hello-world.yml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 97442d0a419..6d7cea7ca91 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,4 +1,4 @@ -name: GitHub Actions Hello self hosted +name: Ligthweight GithubActions on: push: branches: @@ -10,23 +10,11 @@ jobs: Style-Check: runs-on: [self-hosted] steps: - - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by me!" - - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." - - run: docker run hello-world - name: Check out repository code uses: actions/checkout@v2 - - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - - run: echo "🖥️ The workflow is now ready to test your code on the runner." - name: Style Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - - name: List files in the repository - run: | - ls ${{ github.workspace }} - ls $RUNNER_TEMP - - run: ls -la $RUNNER_TEMP - - run: echo "🍏 This job's status is ${{ job.status }}." From c6c36ce5c40d4d92e3a9245cb5893ea287141898 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 14:53:24 +0300 Subject: [PATCH 040/238] Fix yaml lint --- .github/workflows/{hello-world.yml => style-check.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{hello-world.yml => style-check.yml} (93%) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/style-check.yml similarity index 93% rename from .github/workflows/hello-world.yml rename to .github/workflows/style-check.yml index 6d7cea7ca91..fc03f10b9ab 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/style-check.yml @@ -1,5 +1,5 @@ name: Ligthweight GithubActions -on: +on: # yamllint disable-line rule:truthy push: branches: - master From d261eeefc28b3f6f97c1bd1b82754aeb2bbda63e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 15:10:19 +0300 Subject: [PATCH 041/238] Trying workflow --- .github/workflows/style-check.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index fc03f10b9ab..80a1a90bf01 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -7,7 +7,21 @@ on: # yamllint disable-line rule:truthy branches: - master jobs: + CheckLabels: + runs-on: [self-hosted] + steps: + - name: Labels check + run: echo "Hello lables" + DockerHubPush: + needs: CheckLabels + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: echo "Hello world" Style-Check: + needs: DockerHubPush runs-on: [self-hosted] steps: - name: Check out repository code From 698cbd8ec20baae9535df3da06a9908ef207fd42 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 15:12:59 +0300 Subject: [PATCH 042/238] Style check --- tests/ci/style_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 1e19a9815a1..d2dc249c067 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -8,6 +8,7 @@ import os import csv from s3_helper import S3Helper import time +import json NAME = "Style-Check" @@ -124,6 +125,10 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + print("Dumping event file") + print(json.load(event_file)) + parent = get_parent_commit(gh, commit_sha) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) From 75a15829ec28fb53adedd923e3553c0b997cd868 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 15:59:39 +0300 Subject: [PATCH 043/238] Trying beter --- .github/workflows/style-check.yml | 2 +- tests/ci/pr_info.py | 15 ++++ tests/ci/run_check.py | 119 ++++++++++++++++++++++++++++++ tests/ci/style_check.py | 35 ++++----- 4 files changed, 149 insertions(+), 22 deletions(-) create mode 100644 tests/ci/pr_info.py create mode 100644 tests/ci/run_check.py diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 80a1a90bf01..f2b9fa0f99c 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,7 +11,7 @@ jobs: runs-on: [self-hosted] steps: - name: Labels check - run: echo "Hello lables" + run: python3 run_check.py DockerHubPush: needs: CheckLabels runs-on: [self-hosted] diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py new file mode 100644 index 00000000000..4a18b2a864b --- /dev/null +++ b/tests/ci/pr_info.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +import requests + +class PRInfo: + def __init__(self, github_event): + self.number = github_event['number'] + self.sha = github_event['after'] + self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) + self.user_login = github_event['pull_request']['user']['login'] + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) + else: + self.user_orgs = set([]) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py new file mode 100644 index 00000000000..2f1d97445b5 --- /dev/null +++ b/tests/ci/run_check.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +import os +import json +import requests +from pr_info import PRInfo +import sys + +NAME = 'Run Check (actions)' + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) +DO_NOT_TEST_LABEL = "do not test" +FakePR = namedtuple("FakePR", "number") + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + logging.info("User '{}' is trusted".format(user)) + return True + + logging.info("User '{}' is not trusted".format(user)) + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, user)) + return True + logging.info("Org '{}' is not trusted".format(org_id)) + + return False + +# Returns whether we should look into individual checks for this PR. If not, it +# can be skipped entirely. +def should_run_checks_for_pr(pr_info): + # Consider the labels and whether the user is trusted. + force_labels = set(['force tests', 'release']).intersection(pr_info.labels) + if force_labels: + return True, "Labeled '{}'".format(', '.join(force_labels)) + + if 'do not test' in pr_info.labels: + return False, "Labeled 'do not test'" + + if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + return False, "Needs 'can be tested' label" + + # Stop processing any checks for a PR when Fast Test fails. + fast_test_status = pr_info.statuses.get("Fast Test") + if fast_test_status and fast_test_status.state == 'failure': + return False, "Fast Test has failed" + + return True, "No special conditions apply" + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(os.getenv("GITHUB_TOKEN")) + if not can_run: + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + commit.create_status(context=NAME, description=description, state="failed", target_url=url) + sys.exit(1) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index d2dc249c067..9e5307ccbdb 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -9,8 +9,9 @@ import csv from s3_helper import S3Helper import time import json +from pr_info import PRInfo -NAME = "Style-Check" +NAME = "Style Check (actions)" def process_logs(s3_client, additional_logs, s3_path_prefix): @@ -65,7 +66,7 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" raw_log_url = additional_urls[0] additional_urls.pop(0) @@ -79,17 +80,10 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re return url -def get_pr_url_from_ref(ref): - try: - return ref.split("/")[2] - except: - return "master" - -def get_parent_commit(gh, commit_sha): +def get_commit(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) - parent = commit.parents[1] - return parent + return commit def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " @@ -105,9 +99,11 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') - run_id = os.getenv("GITHUB_RUN_ID", 0) - commit_sha = os.getenv("GITHUB_SHA", 0) - ref = os.getenv("GITHUB_REF", "") + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") @@ -125,12 +121,9 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - print("Dumping event file") - print(json.load(event_file)) - - parent = get_parent_commit(gh, commit_sha) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), parent.sha, state, description, test_results, additional_files) - parent.create_status(context=NAME, description=description, state=state, target_url=report_url) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, state, description, test_results, additional_files) + + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From 9d115c030d019abafc4a7410fe97a364bb31f5ad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:00:58 +0300 Subject: [PATCH 044/238] Fix --- .github/workflows/style-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index f2b9fa0f99c..e0e52dfc49c 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,7 +11,7 @@ jobs: runs-on: [self-hosted] steps: - name: Labels check - run: python3 run_check.py + run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From f1cb202339a837dfb0ebc2651b548bddbc0ea356 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:02:09 +0300 Subject: [PATCH 045/238] Fix --- .github/workflows/style-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index e0e52dfc49c..b7153d056c5 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -10,6 +10,8 @@ jobs: CheckLabels: runs-on: [self-hosted] steps: + - name: Check out repository code + uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py DockerHubPush: From ea1d8d563078241b51596559c4a2565965b4b090 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:03:15 +0300 Subject: [PATCH 046/238] Fix --- tests/ci/run_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 2f1d97445b5..443096eda2c 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -15,7 +15,6 @@ TRUSTED_ORG_IDS = { OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) DO_NOT_TEST_LABEL = "do not test" -FakePR = namedtuple("FakePR", "number") # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in From 3d455b7e9a49af31f97de10d89fad20134b8860f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:04:29 +0300 Subject: [PATCH 047/238] fix --- tests/ci/run_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 443096eda2c..8f7fe3da870 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -4,6 +4,7 @@ import json import requests from pr_info import PRInfo import sys +import logging NAME = 'Run Check (actions)' From 0cdb377b830a865bcb881526fd26088ad6db2e49 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:05:27 +0300 Subject: [PATCH 048/238] Fix --- tests/ci/run_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 8f7fe3da870..61ee1caea30 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,6 +5,7 @@ import requests from pr_info import PRInfo import sys import logging +from github import Github NAME = 'Run Check (actions)' From 1a83fca8808604d6c1fc86874cd161589579f52b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:09:17 +0300 Subject: [PATCH 049/238] FGix --- tests/ci/run_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 61ee1caea30..dc5fec46292 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -113,7 +113,7 @@ if __name__ == "__main__": can_run, description = should_run_checks_for_pr(pr_info) gh = Github(os.getenv("GITHUB_TOKEN")) if not can_run: - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" commit.create_status(context=NAME, description=description, state="failed", target_url=url) From e90322a68da75e49a85887b974cfbaac03fa7d96 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:10:20 +0300 Subject: [PATCH 050/238] Moar --- tests/ci/run_check.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index dc5fec46292..f6f201be498 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -103,6 +103,10 @@ def should_run_checks_for_pr(pr_info): return True, "No special conditions apply" +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From ea72b603b3f15f9f166fd021f35c301c862023b1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:13:13 +0300 Subject: [PATCH 051/238] Fix --- tests/ci/run_check.py | 6 ++++-- tests/ci/style_check.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index f6f201be498..cb19ca8fc4e 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -115,10 +115,12 @@ if __name__ == "__main__": pr_info = PRInfo(event) can_run, description = should_run_checks_for_pr(pr_info) - gh = Github(os.getenv("GITHUB_TOKEN")) if not can_run: task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + print("Commit sha", pr_info.sha) + print("PR number", pr_info.number) + gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - commit.create_status(context=NAME, description=description, state="failed", target_url=url) + commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9e5307ccbdb..04fb166ccbd 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -92,7 +92,6 @@ def update_check_with_curl(check_id): "--header 'content-type: application/json' " "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) - print("CMD {}", cmd) subprocess.check_call(cmd, shell=True) if __name__ == "__main__": From 21b35374c70edf75beb4a9b426a679ec89d4fd98 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:17:41 +0300 Subject: [PATCH 052/238] Add token --- .github/workflows/style-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index b7153d056c5..09a22e6fe15 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -14,6 +14,8 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From d5747d2cd9f6895438294ef0661475c60ad9bdc1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:19:02 +0300 Subject: [PATCH 053/238] Bump From dd751cdc7b66495ca08ea13d57b2c75df936973e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:20:56 +0300 Subject: [PATCH 054/238] Fix --- tests/ci/run_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index cb19ca8fc4e..34e09734ddc 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -69,14 +69,14 @@ TRUSTED_CONTRIBUTORS = { def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): if pr_user_login in TRUSTED_CONTRIBUTORS: - logging.info("User '{}' is trusted".format(user)) + logging.info("User '{}' is trusted".format(pr_user_login)) return True - logging.info("User '{}' is not trusted".format(user)) + logging.info("User '{}' is not trusted".format(pr_user_login)) for org_id in pr_user_orgs: if org_id in TRUSTED_ORG_IDS: - logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, user)) + logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, pr_user_login)) return True logging.info("Org '{}' is not trusted".format(org_id)) From 43653d7bdca3f35076fb79b9fb6f6100105eef10 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:22:07 +0300 Subject: [PATCH 055/238] Fix run check --- tests/ci/run_check.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 34e09734ddc..7a6c0573e03 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -96,11 +96,6 @@ def should_run_checks_for_pr(pr_info): if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): return False, "Needs 'can be tested' label" - # Stop processing any checks for a PR when Fast Test fails. - fast_test_status = pr_info.statuses.get("Fast Test") - if fast_test_status and fast_test_status.state == 'failure': - return False, "Fast Test has failed" - return True, "No special conditions apply" def get_commit(gh, commit_sha): From 91ea5ada95960e8057b57aa4107011f88521ea3a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:31:57 +0300 Subject: [PATCH 056/238] More flexible labels --- .github/workflows/style-check.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 09a22e6fe15..6183e5f4ffb 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -1,9 +1,12 @@ name: Ligthweight GithubActions on: # yamllint disable-line rule:truthy - push: - branches: - - master pull_request: + types: + - labeled + - unlabeled + - synchronize + - reopened + - opened branches: - master jobs: From a1b8aac1d8422ad868c533657b36cb69593c9963 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:36:48 +0300 Subject: [PATCH 057/238] better --- tests/ci/run_check.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 7a6c0573e03..f7a3e894f29 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -110,12 +110,11 @@ if __name__ == "__main__": pr_info = PRInfo(event) can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - print("Commit sha", pr_info.sha) - print("PR number", pr_info.number) - gh = Github(os.getenv("GITHUB_TOKEN")) - commit = get_commit(gh, pr_info.sha) - url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) + else: + commit.create_status(context=NAME, description=description, state="pending", target_url=url) From 2d01dc1a1b6d3424c81de64bf683b996e6712a4b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:38:06 +0300 Subject: [PATCH 058/238] Fix --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 4a18b2a864b..410e01f26af 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -3,6 +3,7 @@ import requests class PRInfo: def __init__(self, github_event): + print(json.dumps(github_event, indent=4, sort_keys=True)) self.number = github_event['number'] self.sha = github_event['after'] self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) From aebd46f7c9ac49fdc960ab476e7478cef8a29e22 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:39:11 +0300 Subject: [PATCH 059/238] Missed file --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 410e01f26af..285944afd46 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import requests +import json class PRInfo: def __init__(self, github_event): From 1c007643c3d8dd5a11843c9a356d5e4e9ab75459 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:45:44 +0300 Subject: [PATCH 060/238] Trying better --- tests/ci/pr_info.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 285944afd46..831cd4f2815 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -5,8 +5,13 @@ import json class PRInfo: def __init__(self, github_event): print(json.dumps(github_event, indent=4, sort_keys=True)) + self.number = github_event['number'] - self.sha = github_event['after'] + if 'after' in github_event: + self.sha = github_event['after'] + else: + self.sha = os.getenv('GITHUB_SHA') + self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) self.user_login = github_event['pull_request']['user']['login'] user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) From e696103e3921d8f5780558edd75f48fb2d3cd270 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:48:31 +0300 Subject: [PATCH 061/238] Moar --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 831cd4f2815..7eb8af03a1a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import requests import json +import os class PRInfo: def __init__(self, github_event): From 04dc61dfc3d737712d140ffd691673526535f06a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:56:03 +0300 Subject: [PATCH 062/238] More debug --- tests/ci/pr_info.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 7eb8af03a1a..d25215722e4 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -21,3 +21,14 @@ class PRInfo: self.user_orgs = set(org['id'] for org in response_json) else: self.user_orgs = set([]) + + print(self.get_dict()) + + def get_dict(self): + return { + 'sha': self.sha, + 'number': self.number, + 'labels': self.labels, + 'user_login': self.user_login, + 'user_orgs': self.user_orgs, + } From 77df16ea6d07c228ce8913935ffb2cdca8a41428 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 17:12:23 +0300 Subject: [PATCH 063/238] Better --- tests/ci/pr_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index d25215722e4..c213f33fa3a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -11,7 +11,7 @@ class PRInfo: if 'after' in github_event: self.sha = github_event['after'] else: - self.sha = os.getenv('GITHUB_SHA') + self.sha = github_event['pull_request']['head']['sha'] self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) self.user_login = github_event['pull_request']['user']['login'] From 8df8c02c5740cd066003aed168a6706879e9857a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 17:15:02 +0300 Subject: [PATCH 064/238] Check for orgs request --- tests/ci/pr_info.py | 18 +++++++----------- tests/ci/run_check.py | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index c213f33fa3a..eb159051ba2 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -4,9 +4,7 @@ import json import os class PRInfo: - def __init__(self, github_event): - print(json.dumps(github_event, indent=4, sort_keys=True)) - + def __init__(self, github_event, need_orgs=False): self.number = github_event['number'] if 'after' in github_event: self.sha = github_event['after'] @@ -15,14 +13,12 @@ class PRInfo: self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) self.user_login = github_event['pull_request']['user']['login'] - user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) - if user_orgs_response.ok: - response_json = user_orgs_response.json() - self.user_orgs = set(org['id'] for org in response_json) - else: - self.user_orgs = set([]) - - print(self.get_dict()) + self.user_orgs = set([]) + if need_orgs: + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) def get_dict(self): return { diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index f7a3e894f29..26e648dae90 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -108,7 +108,7 @@ if __name__ == "__main__": with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) From 1cc7b022b2298e1db169a09dd45dd7625a15aaac Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 19:32:17 +0300 Subject: [PATCH 065/238] Test --- .github/workflows/style-check.yml | 7 +- tests/ci/docker_images_check.py | 206 ++++++++++++++++++++++++++++++ tests/ci/pr_info.py | 12 +- tests/ci/style_check.py | 5 +- 4 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 tests/ci/docker_images_check.py diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 6183e5f4ffb..fe03f08127f 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -26,7 +26,12 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Images check - run: echo "Hello world" + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCKER_ROBOT_PASSWORD: ${{ secrects.DOCKER_ROBOT_PASSWORD }} Style-Check: needs: DockerHubPush runs-on: [self-hosted] diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py new file mode 100644 index 00000000000..590935ab78b --- /dev/null +++ b/tests/ci/docker_images_check.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +import subprocess +import logging +from report import create_test_html_report +from s3_helper import S3Helper + +NAME = "Push to Dockerhub (actions)" + +def get_changed_docker_images(pr_info, repo_path, image_file_path): + images_dict = {} + path_to_images_file = os.path.join(repo_path, image_file_path) + if os.path.exists(path_to_images_file): + with open(path_to_images_file, 'r') as dict_file: + images_dict = json.load(dict_file) + else: + logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + + dockerhub_repo_name = 'yandex' + if not images_dict: + return [], dockerhub_repo_name + + files_changed = pr_info.changed_files + + logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + + changed_images = [] + + for dockerfile_dir, image_description in images_dict.items(): + if image_description['name'].startswith('clickhouse/'): + dockerhub_repo_name = 'clickhouse' + + for f in files_changed: + if f.startswith(dockerfile_dir): + logging.info( + "Found changed file '%s' which affects docker image '%s' with path '%s'", + f, image_description['name'], dockerfile_dir) + changed_images.append(dockerfile_dir) + break + + # The order is important: dependents should go later than bases, so that + # they are built with updated base versions. + index = 0 + while index < len(changed_images): + image = changed_images[index] + for dependent in images_dict[image]['dependent']: + logging.info( + "Marking docker image '%s' as changed because it depends on changed docker image '%s'", + dependent, image) + changed_images.append(dependent) + index += 1 + if index > 100: + # Sanity check to prevent infinite loop. + raise "Too many changed docker images, this is a bug." + str(changed_images) + + # If a dependent image was already in the list because its own files + # changed, but then it was added as a dependent of a changed base, we + # must remove the earlier entry so that it doesn't go earlier than its + # base. This way, the dependent will be rebuilt later than the base, and + # will correctly use the updated version of the base. + seen = set() + no_dups_reversed = [] + for x in reversed(changed_images): + if x not in seen: + seen.add(x) + no_dups_reversed.append(x) + + result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] + logging.info("Changed docker images for PR %s @ %s: '%s'", pull_request.number, commit.sha, result) + return result, dockerhub_repo_name + +def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): + logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) + build_log = None + push_log = None + with open('build_log_' + str(image_name) + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(c.name) + if retcode != 0: + return False, build_log, None + + with open('tag_log_' + str(image_name) + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + logging.info("Pushing image %s to dockerhub", image_name) + + with open('push_log_' + str(image_name) + "_" + version_string, 'w') as pl: + cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + push_log = str(pl.stdout.path) + if retcode != 0: + return False, build_log, push_log + + logging.info("Processing of %s successfully finished", image_name) + return True, build_log, push_log + +def process_single_image(versions, path_to_dockerfile_folder, image_name): + logging.info("Image will be pushed with versions %s", ', '.join(all_for_this_image)) + result = [] + for ver in versions: + for i in range(5): + success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + if success: + result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + break + logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + time.sleep(i * 5) + else: + result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + + logging.info("Processing finished") + return result + + +def process_test_results(s3_client, test_results, s3_path_prefix): + overall_status = 'success' + processed_test_results = [] + for image, build_log, push_log, status in test_results: + if status != 'OK': + overall_status = 'failure' + url_part = '' + if build_log is not None and os.path.exists(build_log): + build_url = s3_client.upload_test_report_to_s3( + build_log, + s3_path_prefix + "/" + os.path.basename(build_log)) + url_part += 'build_log'.format(build_url) + if push_log is not None and os.path.exists(push_log): + push_url = s3_client.upload_test_report_to_s3( + push_log, + s3_path_prefix + "/" + os.path.basename(push_log)) + if url_part: + url_part += ', ' + url_part += 'push_log'.format(push_url) + if url_part: + test_name = image + ' (' + url_part + ')' + else: + test_name = image + processed_test_results.append((test_name, status)) + return overall_status, processed_test_results + +def upload_results(s3_client, pr_number, commit_sha, test_results): + s3_path_prefix = f"{pr_number}/{commit_sha}/" + NAME.lower().replace(' ', '_') + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') + dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, image_file_path) + logging.info("Has changed images %s", ', '.join(changed_images)) + pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + versions = [str(pr_info.number), pr_commit_version] + + subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + + images_processing_result = [] + for rel_path, image_name in changed_images: + full_path = os.path.join(repo_path, rel_path) + images_processing_result += process_single_image(versions, full_path, image) + + if len(changed_images): + description = "Updated " + ','.join([im[1] for im in images]) + else: + description = "Nothing to update" + + if len(description) >= 140: + description = description[:136] + "..." + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + s3_path_prefix = str(pull_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + status, test_results = process_test_results(s3_client, images_processing_result, s3_path_prefix) + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index eb159051ba2..0a8b0db2254 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,9 +2,12 @@ import requests import json import os +import subprocess +from unidiff import PatchSet + class PRInfo: - def __init__(self, github_event, need_orgs=False): + def __init__(self, github_event, need_orgs=False, need_changed_files=False): self.number = github_event['number'] if 'after' in github_event: self.sha = github_event['after'] @@ -20,6 +23,13 @@ class PRInfo: response_json = user_orgs_response.json() self.user_orgs = set(org['id'] for org in response_json) + self.changed_files = set([]) + if need_changed_files: + diff_url = github_event['pull_request']['diff_url'] + diff = urllib.request.urlopen(github_event['pull_request']['diff_url']) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = set([f.path for f in diff_object]) + def get_dict(self): return { 'sha': self.sha, diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 04fb166ccbd..233c7a45470 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -54,11 +54,10 @@ def process_result(result_folder): state, description = "error", "Failed to read test_results.tsv" return state, description, test_results, additional_files -def upload_results(s3_client, pr_number, commit_sha, state, description, test_results, additional_files): +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): s3_path_prefix = f"{pr_number}/{commit_sha}/style_check" additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) - # Add link to help. Anchors in the docs must be adjusted accordingly. branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" branch_name = "master" if pr_number != 0: @@ -122,7 +121,7 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, state, description, test_results, additional_files) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From 0f2a1e957b2e398890e7d74d2d1d86bd8548ae34 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 19:34:14 +0300 Subject: [PATCH 066/238] Fix check --- .github/workflows/style-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index fe03f08127f..c41e531f2c2 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -31,7 +31,7 @@ jobs: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCKER_ROBOT_PASSWORD: ${{ secrects.DOCKER_ROBOT_PASSWORD }} + DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} Style-Check: needs: DockerHubPush runs-on: [self-hosted] From c1d36e41f3e2fdb3a5d4bdd78e5eae6bc870e187 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 19:58:36 +0300 Subject: [PATCH 067/238] Docker images check --- tests/ci/docker_images_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 590935ab78b..8866df6e838 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -3,6 +3,7 @@ import subprocess import logging from report import create_test_html_report from s3_helper import S3Helper +import os NAME = "Push to Dockerhub (actions)" From 30d1f4c3adafcd495490f5d9fd7823fe6e552270 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:01:16 +0300 Subject: [PATCH 068/238] Docker images check --- tests/ci/docker_images_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 8866df6e838..7bf03427a78 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -3,6 +3,7 @@ import subprocess import logging from report import create_test_html_report from s3_helper import S3Helper +import json import os NAME = "Push to Dockerhub (actions)" From d2a76e32b8cb01fa836618a6a56ab84e50904e0b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:04:48 +0300 Subject: [PATCH 069/238] Docker image --- tests/ci/docker_images_check.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 7bf03427a78..df4e47705ed 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -5,6 +5,8 @@ from report import create_test_html_report from s3_helper import S3Helper import json import os +from pr_info import PRInfo +from github import Github NAME = "Push to Dockerhub (actions)" @@ -204,5 +206,6 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=url) From 5a750e05fd3865ddf88db7d9508f96779073a69a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:07:10 +0300 Subject: [PATCH 070/238] Fix --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index df4e47705ed..c49e88b1fc7 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -176,7 +176,7 @@ if __name__ == "__main__": event = json.load(event_file) pr_info = PRInfo(event) - changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, image_file_path) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join(changed_images)) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha versions = [str(pr_info.number), pr_commit_version] From 9c6723056295a6d2257e2d006545844f3468185f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:11:58 +0300 Subject: [PATCH 071/238] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index c49e88b1fc7..91869f63d43 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -69,7 +69,7 @@ def get_changed_docker_images(pr_info, repo_path, image_file_path): no_dups_reversed.append(x) result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] - logging.info("Changed docker images for PR %s @ %s: '%s'", pull_request.number, commit.sha, result) + logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) return result, dockerhub_repo_name def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): From ea6aa62f0e41165cdd2713e575db4d3e44a19fc8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:16:24 +0300 Subject: [PATCH 072/238] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 91869f63d43..0e3eb54852e 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -201,7 +201,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - s3_path_prefix = str(pull_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') status, test_results = process_test_results(s3_client, images_processing_result, s3_path_prefix) url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) From 0115b428ee85e366cbdcbf6cf76b45f3e453514e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:19:34 +0300 Subject: [PATCH 073/238] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0e3eb54852e..854d56b8017 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -202,7 +202,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - status, test_results = process_test_results(s3_client, images_processing_result, s3_path_prefix) + status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) From 0924da80d7e882f334cb77970ade644cea44727f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:22:27 +0300 Subject: [PATCH 074/238] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 854d56b8017..0c2ebb52908 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -158,7 +158,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) + html_report = create_test_html_report(NAME, test_results, "https://hub.docker.com/u/clickhouse", task_url, branch_url, branch_name, commit_url) with open('report.html', 'w') as f: f.write(html_report) From 735716a2851118150ae2010520a7f3ce8d01aabc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:25:12 +0300 Subject: [PATCH 075/238] More fixes --- tests/ci/docker_images_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0c2ebb52908..b046b8c9089 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -166,6 +166,11 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): logging.info("Search result in url %s", url) return url +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) From 71dcf03a0f83c8126f5b9153ca202248d3893837 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:35:15 +0300 Subject: [PATCH 076/238] Create changed image --- docker/test/style/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 424bfe71b15..0118e6df764 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# yaml check is not the best one + cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv ./check-style -n |& tee /test_output/style_output.txt ./check-typos |& tee /test_output/typos_output.txt From 56499fb7ca56d6c8a51640db01883ae237b2789b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:47:04 +0300 Subject: [PATCH 077/238] Track changed files --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index b046b8c9089..6a9d1ba79c9 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -180,7 +180,7 @@ if __name__ == "__main__": with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(event, False, True) changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join(changed_images)) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha From 6245cc8b6aee5fe78ffa0282e13e67b626ab3404 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:48:39 +0300 Subject: [PATCH 078/238] Track changed files --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 0a8b0db2254..8feedb2d4d7 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -3,6 +3,7 @@ import requests import json import os import subprocess +import urllib from unidiff import PatchSet From 2ac210d63bc2abdd28b55123804737691c482a1a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:55:21 +0300 Subject: [PATCH 079/238] One more --- tests/ci/docker_images_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 6a9d1ba79c9..92353bb4386 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -103,7 +103,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri return True, build_log, push_log def process_single_image(versions, path_to_dockerfile_folder, image_name): - logging.info("Image will be pushed with versions %s", ', '.join(all_for_this_image)) + logging.info("Image will be pushed with versions %s", ', '.join(versions)) result = [] for ver in versions: for i in range(5): @@ -182,7 +182,7 @@ if __name__ == "__main__": pr_info = PRInfo(event, False, True) changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") - logging.info("Has changed images %s", ', '.join(changed_images)) + logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha versions = [str(pr_info.number), pr_commit_version] @@ -191,7 +191,7 @@ if __name__ == "__main__": images_processing_result = [] for rel_path, image_name in changed_images: full_path = os.path.join(repo_path, rel_path) - images_processing_result += process_single_image(versions, full_path, image) + images_processing_result += process_single_image(versions, full_path, image_name) if len(changed_images): description = "Updated " + ','.join([im[1] for im in images]) From d517ac3fce992f96a557bd0f59d7dd08b3a92f8a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:59:06 +0300 Subject: [PATCH 080/238] Fix build --- tests/ci/docker_images_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 92353bb4386..222b5ae3723 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -76,14 +76,14 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) build_log = None push_log = None - with open('build_log_' + str(image_name) + "_" + version_string, 'w') as pl: + with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() build_log = str(c.name) if retcode != 0: return False, build_log, None - with open('tag_log_' + str(image_name) + "_" + version_string, 'w') as pl: + with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() build_log = str(pl.name) @@ -92,7 +92,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri logging.info("Pushing image %s to dockerhub", image_name) - with open('push_log_' + str(image_name) + "_" + version_string, 'w') as pl: + with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() push_log = str(pl.stdout.path) From 3fedd11c0bf41480be8fe1aef0c9b19261916381 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:01:36 +0300 Subject: [PATCH 081/238] Fix accident changes: --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 222b5ae3723..06162fe4624 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -79,7 +79,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - build_log = str(c.name) + build_log = str(pl.name) if retcode != 0: return False, build_log, None From 7a4ff98612d8854de9da90cd80121b5d57bae43e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:05:02 +0300 Subject: [PATCH 082/238] Fix accident changes: --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 06162fe4624..c4532e449f5 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -95,7 +95,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - push_log = str(pl.stdout.path) + push_log = str(pl.name) if retcode != 0: return False, build_log, push_log From 340d24d07b41e108ebbee1ff7104221578211758 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:07:42 +0300 Subject: [PATCH 083/238] Fix accident changes: --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index c4532e449f5..0e8414e6df5 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -194,7 +194,7 @@ if __name__ == "__main__": images_processing_result += process_single_image(versions, full_path, image_name) if len(changed_images): - description = "Updated " + ','.join([im[1] for im in images]) + description = "Updated " + ','.join([im[1] for im in changed_images]) else: description = "Nothing to update" From b5aca2265be2a996f30ea5a26f0beea55d49b0a3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:26:48 +0300 Subject: [PATCH 084/238] Trying to path images --- .github/workflows/style-check.yml | 10 ++++++++++ tests/ci/docker_images_check.py | 7 +++++++ tests/ci/style_check.py | 15 +++++++++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index c41e531f2c2..4bfffcf9f15 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -32,10 +32,20 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json Style-Check: needs: DockerHubPush runs-on: [self-hosted] steps: + - name: Download math result for job 1 + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json - name: Check out repository code uses: actions/checkout@v2 - name: Style Check diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0e8414e6df5..284406466a9 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -184,20 +184,24 @@ if __name__ == "__main__": changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + versions = [str(pr_info.number), pr_commit_version] subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + result_images = {} images_processing_result = [] for rel_path, image_name in changed_images: full_path = os.path.join(repo_path, rel_path) images_processing_result += process_single_image(versions, full_path, image_name) + result_images[image_name] = pr_commit_version if len(changed_images): description = "Updated " + ','.join([im[1] for im in changed_images]) else: description = "Nothing to update" + if len(description) >= 140: description = description[:136] + "..." @@ -214,3 +218,6 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=url) + + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + json.dump(result_images, images_file) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 233c7a45470..9721fe60b18 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -107,7 +107,18 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) - docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/style-check' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.loads(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/style-check' in images: + docker_image += ':' + images['clickhouse/style-check'] + + logging.info("Got docker image %s", docker_image) + if not aws_secret_key_id or not aws_secret_key: logging.info("No secrets, will not upload anything to S3") @@ -119,7 +130,7 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) From fc43998c944c42bfc4c9c6ab1346d121f705c5cf Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:48:06 +0300 Subject: [PATCH 085/238] Fixes --- .github/workflows/style-check.yml | 4 ++-- tests/ci/docker_images_check.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 4bfffcf9f15..da2b433a62b 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -36,7 +36,7 @@ jobs: uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/changed_images.json + path: ${{ runner.temp }}/docker_images_check/changed_images.json Style-Check: needs: DockerHubPush runs-on: [self-hosted] @@ -45,7 +45,7 @@ jobs: uses: actions/download-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/changed_images.json + path: ${{ runner.temp }}/style_check/changed_images.json - name: Check out repository code uses: actions/checkout@v2 - name: Style Check diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 284406466a9..254efa9e94a 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -177,6 +177,12 @@ if __name__ == "__main__": temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) From e9364fc0752309c212782eadedef24eed6129ece Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:52:37 +0300 Subject: [PATCH 086/238] One more time --- .github/workflows/style-check.yml | 2 +- tests/ci/docker_images_check.py | 3 --- tests/ci/style_check.py | 9 +++------ 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index da2b433a62b..1a7ceb323cb 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -45,7 +45,7 @@ jobs: uses: actions/download-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/style_check/changed_images.json + path: ${{ runner.temp }}/style_check - name: Check out repository code uses: actions/checkout@v2 - name: Style Check diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 254efa9e94a..bb64474c878 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -177,9 +177,6 @@ if __name__ == "__main__": temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') - if os.path.exists(temp_path): - shutil.rmtree(temp_path) - if not os.path.exists(temp_path): os.makedirs(temp_path) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9721fe60b18..008e3e88490 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -102,6 +102,9 @@ if __name__ == "__main__": event = json.load(event_file) pr_info = PRInfo(event) + if not os.path.exists(temp_path): + os.makedirs(temp_path) + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") @@ -124,12 +127,6 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - if os.path.exists(temp_path): - shutil.rmtree(temp_path) - - if not os.path.exists(temp_path): - os.makedirs(temp_path) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) From fbd7cc4f690fdc693ee83789e2cf68fa6318e67d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:54:20 +0300 Subject: [PATCH 087/238] Followup --- .github/workflows/style-check.yml | 2 +- tests/ci/style_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 1a7ceb323cb..cde033d4d91 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -41,7 +41,7 @@ jobs: needs: DockerHubPush runs-on: [self-hosted] steps: - - name: Download math result for job 1 + - name: Download changed images uses: actions/download-artifact@v2 with: name: changed_images diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 008e3e88490..9ab27bb22a5 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -115,7 +115,7 @@ if __name__ == "__main__": if os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r') as images_fd: - images = json.loads(images_fd) + images = json.load(images_fd) logging.info("Got images %s", images) if 'clickhouse/style-check' in images: docker_image += ':' + images['clickhouse/style-check'] From 25171f8e0768941d4c96e1a850042a63cdbb5ea9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:57:48 +0300 Subject: [PATCH 088/238] Almost there --- tests/ci/style_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9ab27bb22a5..9ff9d7e54ac 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -111,14 +111,14 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) images_path = os.path.join(temp_path, 'changed_images.json') - docker_image = 'clickhouse/style-check' + docker_image = 'clickhouse/style-test' if os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r') as images_fd: images = json.load(images_fd) logging.info("Got images %s", images) - if 'clickhouse/style-check' in images: - docker_image += ':' + images['clickhouse/style-check'] + if 'clickhouse/style-test' in images: + docker_image += ':' + images['clickhouse/style-test'] logging.info("Got docker image %s", docker_image) From bc1a0b79354626b9e8d6d4d5a5b13a47d9c425ba Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 11:33:55 +0300 Subject: [PATCH 089/238] Branding? --- .github/workflows/style-check.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index cde033d4d91..08c96d2398d 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -1,4 +1,7 @@ name: Ligthweight GithubActions +branding: + icon: 'award' + color: 'green' on: # yamllint disable-line rule:truthy pull_request: types: From e471cdce4999e6b618eb617dbb969142cc8ea265 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:26:49 +0300 Subject: [PATCH 090/238] Trying reports --- .github/workflows/style-check.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 08c96d2398d..5f8032b43d3 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -1,7 +1,4 @@ name: Ligthweight GithubActions -branding: - icon: 'award' - color: 'green' on: # yamllint disable-line rule:truthy pull_request: types: @@ -22,6 +19,8 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Just Checking + run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From cbc34c66b0577b6a55c4e22413426331cb99fb33 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:33:38 +0300 Subject: [PATCH 091/238] Trying annotations --- .github/workflows/style-check.yml | 2 -- tests/ci/docker_images_check.py | 2 ++ tests/ci/style_check.py | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 5f8032b43d3..cde033d4d91 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -19,8 +19,6 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Just Checking - run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index bb64474c878..a7901b5fda8 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -224,3 +224,5 @@ if __name__ == "__main__": with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) + + print("::notice ::Report url: {}".format(url)) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9ff9d7e54ac..4a75eee70ee 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -130,6 +130,4 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) - - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=state, target_url=report_url) + print("::notice ::Report url: {}".format(report_url)) From a2772f4304757ef6da7a4b466820da9065a5dd9f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:37:39 +0300 Subject: [PATCH 092/238] Maybe supports html --- tests/ci/docker_images_check.py | 2 +- tests/ci/run_check.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index a7901b5fda8..639f19e6973 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -225,4 +225,4 @@ if __name__ == "__main__": with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) - print("::notice ::Report url: {}".format(url)) + print("::notice ::

Report url

: {}".format(url)) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 26e648dae90..788bfc5b5b1 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -114,7 +114,9 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: + print("::notice ::

Cannot run

") commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: + print("::notice ::

Can run

") commit.create_status(context=NAME, description=description, state="pending", target_url=url) From dc3396a2cfc3bf6f676f32cf6feb15197200e936 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:41:13 +0300 Subject: [PATCH 093/238] Branding --- .github/workflows/style-check.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index cde033d4d91..3f691242acc 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,6 +11,9 @@ on: # yamllint disable-line rule:truthy - master jobs: CheckLabels: + branding: + icon: 'award' + color: 'green' runs-on: [self-hosted] steps: - name: Check out repository code @@ -19,6 +22,7 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From e439532f905ad14c8f7e17b65e328059328a127d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:43:19 +0300 Subject: [PATCH 094/238] Followup --- .github/workflows/style-check.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 3f691242acc..6d8013c2e8a 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -22,7 +22,6 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From 42da0d71788defe521585ec4086a50beef3a0a4b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:04:37 +0300 Subject: [PATCH 095/238] Trying split actions --- .github/workflows/docker-hub-action.yml | 18 ++++++++++++++++++ .github/workflows/style-check.yml | 7 +------ 2 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/docker-hub-action.yml diff --git a/.github/workflows/docker-hub-action.yml b/.github/workflows/docker-hub-action.yml new file mode 100644 index 00000000000..924c4692e57 --- /dev/null +++ b/.github/workflows/docker-hub-action.yml @@ -0,0 +1,18 @@ +name: 'DockerHubPush' +description: 'Action push images to dockerhub' +runs: + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/docker_images_check/changed_images.json diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 6d8013c2e8a..72ea5a8a27d 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,9 +11,6 @@ on: # yamllint disable-line rule:truthy - master jobs: CheckLabels: - branding: - icon: 'award' - color: 'green' runs-on: [self-hosted] steps: - name: Check out repository code @@ -26,10 +23,8 @@ jobs: needs: CheckLabels runs-on: [self-hosted] steps: - - name: Check out repository code - uses: actions/checkout@v2 - name: Images check - run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + uses: .github/workflows env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} From fd0a4bdd67f70dff65f11b9408b9e7c19632565a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:05:59 +0300 Subject: [PATCH 096/238] Add on --- .github/workflows/docker-hub-action.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/docker-hub-action.yml b/.github/workflows/docker-hub-action.yml index 924c4692e57..66d969ec29f 100644 --- a/.github/workflows/docker-hub-action.yml +++ b/.github/workflows/docker-hub-action.yml @@ -1,5 +1,15 @@ name: 'DockerHubPush' description: 'Action push images to dockerhub' +on: # yamllint disable-line rule:truthy + pull_request: + types: + - labeled + - unlabeled + - synchronize + - reopened + - opened + branches: + - master runs: steps: - name: Check out repository code From bafad0fb09962806ca7596655c52b3cc0f7e7576 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:07:10 +0300 Subject: [PATCH 097/238] Trying --- .github/{workflows => actions}/docker-hub-action.yml | 0 .github/workflows/style-check.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/{workflows => actions}/docker-hub-action.yml (100%) diff --git a/.github/workflows/docker-hub-action.yml b/.github/actions/docker-hub-action.yml similarity index 100% rename from .github/workflows/docker-hub-action.yml rename to .github/actions/docker-hub-action.yml diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 72ea5a8a27d..72d6d104a2b 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -24,7 +24,7 @@ jobs: runs-on: [self-hosted] steps: - name: Images check - uses: .github/workflows + uses: .github/actions env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} From 5d3c92dcaf0cc5d0263e131f7f05ada78e97d17a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:11:35 +0300 Subject: [PATCH 098/238] No idea --- .github/workflows/style-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 72d6d104a2b..29943ce30ee 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -23,6 +23,8 @@ jobs: needs: CheckLabels runs-on: [self-hosted] steps: + - name: Check out repository code + uses: actions/checkout@v2 - name: Images check uses: .github/actions env: From 0c68a7c1a9fc13441dd4040a225b2e9de5333cf7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:15:14 +0300 Subject: [PATCH 099/238] Don't give up --- .../{docker-hub-action.yml => docker-hub-push/action.yml} | 0 .github/workflows/style-check.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/actions/{docker-hub-action.yml => docker-hub-push/action.yml} (100%) diff --git a/.github/actions/docker-hub-action.yml b/.github/actions/docker-hub-push/action.yml similarity index 100% rename from .github/actions/docker-hub-action.yml rename to .github/actions/docker-hub-push/action.yml diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 29943ce30ee..37f48fdfc29 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -26,7 +26,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Images check - uses: .github/actions + uses: ./.github/actions/docker-hub-push env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} From 52d6d44370a10498730f90dbe86f72977948646b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:20:39 +0300 Subject: [PATCH 100/238] Shell bash --- .github/actions/docker-hub-push/action.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/actions/docker-hub-push/action.yml b/.github/actions/docker-hub-push/action.yml index 66d969ec29f..9352d158a3b 100644 --- a/.github/actions/docker-hub-push/action.yml +++ b/.github/actions/docker-hub-push/action.yml @@ -16,11 +16,7 @@ runs: uses: actions/checkout@v2 - name: Images check run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + shell: bash - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: From 1bfcbe281cceeba679454fff37e1d3a1f09ff107 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:26:45 +0300 Subject: [PATCH 101/238] Trying run --- .github/actions/docker-hub-push/action.yml | 24 ------------------- .../workflows/{style-check.yml => main.yml} | 2 +- tests/ci/run_check.py | 4 ++-- tests/ci/style_check.py | 2 +- 4 files changed, 4 insertions(+), 28 deletions(-) delete mode 100644 .github/actions/docker-hub-push/action.yml rename .github/workflows/{style-check.yml => main.yml} (95%) diff --git a/.github/actions/docker-hub-push/action.yml b/.github/actions/docker-hub-push/action.yml deleted file mode 100644 index 9352d158a3b..00000000000 --- a/.github/actions/docker-hub-push/action.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: 'DockerHubPush' -description: 'Action push images to dockerhub' -on: # yamllint disable-line rule:truthy - pull_request: - types: - - labeled - - unlabeled - - synchronize - - reopened - - opened - branches: - - master -runs: - steps: - - name: Check out repository code - uses: actions/checkout@v2 - - name: Images check - run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py - shell: bash - - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json diff --git a/.github/workflows/style-check.yml b/.github/workflows/main.yml similarity index 95% rename from .github/workflows/style-check.yml rename to .github/workflows/main.yml index 37f48fdfc29..cde033d4d91 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Images check - uses: ./.github/actions/docker-hub-push + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 788bfc5b5b1..87dc21beda2 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -114,9 +114,9 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: - print("::notice ::

Cannot run

") + print("::notice ::**Cannot run**") commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: - print("::notice ::

Can run

") + print("::notice ::**Can run**") commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 4a75eee70ee..3438e40a5b4 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -130,4 +130,4 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) - print("::notice ::Report url: {}".format(report_url)) + print("::notice ::Report *url*: {} and one more [test]({})".format(report_url, report_url)) From 8b0a85fd90dfa36a3619e34a972e7481f3aed704 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:39:36 +0300 Subject: [PATCH 102/238] Remove --- tests/ci/docker_images_check.py | 2 +- tests/ci/run_check.py | 4 ++-- tests/ci/style_check.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 639f19e6973..a7901b5fda8 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -225,4 +225,4 @@ if __name__ == "__main__": with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) - print("::notice ::

Report url

: {}".format(url)) + print("::notice ::Report url: {}".format(url)) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 87dc21beda2..70b3ae2ac07 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -114,9 +114,9 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: - print("::notice ::**Cannot run**") + print("::notice ::Cannot run") commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: - print("::notice ::**Can run**") + print("::notice ::Can run") commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 3438e40a5b4..4a75eee70ee 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -130,4 +130,4 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) - print("::notice ::Report *url*: {} and one more [test]({})".format(report_url, report_url)) + print("::notice ::Report url: {}".format(report_url)) From 2a74979c3aecf325d5233b97fde7658476fdc55d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:52:04 +0300 Subject: [PATCH 103/238] Trying output --- tests/ci/docker_images_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index a7901b5fda8..b9353a0a44f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -177,6 +177,9 @@ if __name__ == "__main__": temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -226,3 +229,5 @@ if __name__ == "__main__": json.dump(result_images, images_file) print("::notice ::Report url: {}".format(url)) + print("::set-output name=url_output::\"{}\"".format(url)) +) From 7aac1e29b9931b07185dc84c2d76179a135c5aa6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:54:57 +0300 Subject: [PATCH 104/238] Remove --- tests/ci/docker_images_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index b9353a0a44f..bff229e15ff 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -230,4 +230,3 @@ if __name__ == "__main__": print("::notice ::Report url: {}".format(url)) print("::set-output name=url_output::\"{}\"".format(url)) -) From 755e4d2e9e6d8c973dd57eb4faabe914a929adcc Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 14:17:26 +0300 Subject: [PATCH 105/238] Actions --- tests/ci/style_check.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 4a75eee70ee..47b12c11173 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -131,3 +131,5 @@ if __name__ == "__main__": state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) From ec7d83c9e080353b20cb68800a483596af222ee2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 14:41:07 +0300 Subject: [PATCH 106/238] Fix style check --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 47b12c11173..5b2c2258585 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -132,4 +132,4 @@ if __name__ == "__main__": report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=url) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From d25b8881e4c30163abc1099db89365740e1ff2f8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:12:58 +0300 Subject: [PATCH 107/238] More copypaste --- .github/workflows/main.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cde033d4d91..2ff22f0fee6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,3 +54,21 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + PVS-Check: + needs: DockerHubPush + runs-on: [self-hosted] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/pvs_check + - name: Check out repository code + uses: actions/checkout@v2 + - name: PVS Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 pvs_check.py From 1029cb3095634d4a883ffbc870f6809390e4631c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:17:47 +0300 Subject: [PATCH 108/238] Add PVS check --- tests/ci/pvs_check.py | 137 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 tests/ci/pvs_check.py diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py new file mode 100644 index 00000000000..4c2957770e9 --- /dev/null +++ b/tests/ci/pvs_check.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +import subprocess +import os +import json +import logging +from github import Github +from report import create_test_html_report +from s3_helper import S3Helper +from pr_info import PRInfo +import shutil +import sys + +NAME = 'PVS Studio (actions)' +LICENSE_NAME = 'Free license: ClickHouse, Yandex' +HTML_REPORT_FOLDER = 'pvs-studio-html-report' +TXT_REPORT_NAME = 'pvs-studio-task-report.txt' + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + +def _process_txt_report(self, path): + warnings = [] + errors = [] + with open(path, 'r') as report_file: + for line in report_file: + if 'viva64' in line: + continue + elif 'warn' in line: + warnings.append(':'.join(line.split('\t')[0:2])) + elif 'err' in line: + errors.append(':'.join(line.split('\t')[0:2])) + return warnings, errors + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): + s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + gh = Github(os.getenv("GITHUB_TOKEN")) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/pvs-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/pvs-test' in images: + docker_image += ':' + images['clickhouse/pvs-test'] + + logging.info("Got docker image %s", docker_image) + + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + licence_key = os.getenv('PVS_STUDIO_KEY') + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' -e CC=clang-11 -e CXX=clang++-11 {docker_image}" + + subprocess.check_output(cmd, shell=True) + + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None + + commit = get_commit(gh, pr_info.sha) + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break + + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings + + status = 'success' + test_results = [(index_html, "Look at the report")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) + + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) From 0ea203da310a753f456fb75329d22baceacf7b05 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:24:24 +0300 Subject: [PATCH 109/238] Checkout submodules for PVS --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2ff22f0fee6..025ceea8c50 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -65,6 +65,8 @@ jobs: path: ${{ runner.temp }}/pvs_check - name: Check out repository code uses: actions/checkout@v2 + with: + submodules: true - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} From a895375893f7fd1575d4ec7cd6a430211f6e81f0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:28:11 +0300 Subject: [PATCH 110/238] Something strange --- tests/ci/pvs_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 4c2957770e9..9aabb5f734c 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -127,7 +127,7 @@ if __name__ == "__main__": errors = errors + warnings status = 'success' - test_results = [(index_html, "Look at the report")] + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] description = "Total errors {}".format(len(errors)) additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) From a70d4d13dfec21907a58dea4706013878c5c017c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:44:42 +0300 Subject: [PATCH 111/238] Trying one more time --- .github/workflows/main.yml | 2 +- tests/ci/pvs_check.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 025ceea8c50..4e5c2ed19e9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,7 +66,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - submodules: true + submodules: 'recursive' - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 9aabb5f734c..1becffedac2 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -104,9 +104,13 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' -e CC=clang-11 -e CXX=clang++-11 {docker_image}" + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" - subprocess.check_output(cmd, shell=True) + try: + subprocess.check_output(cmd, shell=True) + except: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) From fd4afa236b3756888b4018942a92b6cc2c28f925 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:02:40 +0300 Subject: [PATCH 112/238] Fix licence name --- tests/ci/pvs_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 1becffedac2..b52b0b32f1b 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -104,7 +104,8 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + commit = get_commit(gh, pr_info.sha) try: subprocess.check_output(cmd, shell=True) @@ -116,7 +117,6 @@ if __name__ == "__main__": html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) index_html = None - commit = get_commit(gh, pr_info.sha) for url in html_urls: if 'index.html' in url: index_html = 'HTML report'.format(url) From ed07b085de10b3f5131aec0e80ffc7b51df6382f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:04:47 +0300 Subject: [PATCH 113/238] Trying to fix --- .github/workflows/main.yml | 1 + tests/ci/pvs_check.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4e5c2ed19e9..245f76eb3b6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -67,6 +67,7 @@ jobs: uses: actions/checkout@v2 with: submodules: 'recursive' + path: 'repo_with_submodules' - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index b52b0b32f1b..516f4c16e41 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -71,7 +71,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), "repo_with_submodules") temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: From e3e69825183322d685c90d9909fdd151c81a1764 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:05:04 +0300 Subject: [PATCH 114/238] Followup --- tests/ci/pvs_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 516f4c16e41..3778a6e3110 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -104,7 +104,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" commit = get_commit(gh, pr_info.sha) try: From 68480a659e942e276d248e073f23d164c3f1ffdf Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:14:24 +0300 Subject: [PATCH 115/238] Followup --- tests/ci/pvs_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 3778a6e3110..c7f07a34e32 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -11,7 +11,7 @@ import shutil import sys NAME = 'PVS Studio (actions)' -LICENSE_NAME = 'Free license: ClickHouse, Yandex' +LICENCE_NAME = 'Free license: ClickHouse, Yandex' HTML_REPORT_FOLDER = 'pvs-studio-html-report' TXT_REPORT_NAME = 'pvs-studio-task-report.txt' From 736673bf08ca899c7e42280ebe598e58a516b57f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:52:16 +0300 Subject: [PATCH 116/238] Moar --- tests/ci/pvs_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index c7f07a34e32..75febd9cd49 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -26,7 +26,7 @@ def process_logs(s3_client, additional_logs, s3_path_prefix): return additional_urls -def _process_txt_report(self, path): +def _process_txt_report(path): warnings = [] errors = [] with open(path, 'r') as report_file: @@ -114,7 +114,7 @@ if __name__ == "__main__": sys.exit(1) s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) index_html = None for url in html_urls: From 8b1be85bcbe771e7c28fd3bd4a4b199ff202705a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 18:51:43 +0300 Subject: [PATCH 117/238] Trying other way --- .github/workflows/main.yml | 2 ++ tests/ci/pvs_check.py | 46 +++++++++++++++++++++----------------- tests/ci/s3_helper.py | 2 +- tests/ci/style_check.py | 2 +- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 245f76eb3b6..a1d6cf05fd6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -48,6 +48,8 @@ jobs: path: ${{ runner.temp }}/style_check - name: Check out repository code uses: actions/checkout@v2 + with: + path: 'repo_without_submodules' - name: Style Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 75febd9cd49..8bc6df632f2 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -113,29 +113,33 @@ if __name__ == "__main__": commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") sys.exit(1) - s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) - index_html = None + try: + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None - for url in html_urls: - if 'index.html' in url: - index_html = 'HTML report'.format(url) - break + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break - if not index_html: - commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") - sys.exit(1) + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) - txt_report = os.path.join(temp_path, TXT_REPORT_NAME) - warnings, errors = _process_txt_report(txt_report) - errors = errors + warnings + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings - status = 'success' - test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] - description = "Total errors {}".format(len(errors)) - additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) + status = 'success' + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) - print("::notice ::Report url: {}".format(report_url)) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + except Exception as ex: + print("Got an exception", ex) + sys.exit(1) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 8a170da44f8..b9ae0de6e02 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -71,7 +71,7 @@ class S3Helper(object): if not files: return [] - p = Pool(min(len(files), 30)) + p = Pool(min(len(files), 5)) def task(file_name): full_fs_path = os.path.join(folder_path, file_name) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 5b2c2258585..f41120f7de7 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -95,7 +95,7 @@ def update_check_with_curl(check_id): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), 'repo_without_submodules') temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: From c6f9c8e7ba992f4eab909f5b94d179442550ec06 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 18:54:40 +0300 Subject: [PATCH 118/238] Missed file --- tests/ci/docker_images_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index bff229e15ff..141d075cc6d 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -7,6 +7,7 @@ import json import os from pr_info import PRInfo from github import Github +import shutil NAME = "Push to Dockerhub (actions)" From fce1d7e156502cf71a16b0f8ff6001c209bfd44c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 22:35:38 +0300 Subject: [PATCH 119/238] Fix stupid bug --- tests/ci/pvs_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 8bc6df632f2..18cee175970 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -123,9 +123,9 @@ if __name__ == "__main__": index_html = 'HTML report'.format(url) break - if not index_html: - commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") - sys.exit(1) + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) txt_report = os.path.join(temp_path, TXT_REPORT_NAME) warnings, errors = _process_txt_report(txt_report) From 6556e77eb42771791e5eb33840433a4772faf4ac Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 10:51:24 +0300 Subject: [PATCH 120/238] Test From d0d4318624e7ddc4f9c24ac4ea56c44c875b94d4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 11:20:56 +0300 Subject: [PATCH 121/238] Use correct user --- .github/workflows/main.yml | 3 --- tests/ci/pvs_check.py | 11 +++++++++-- tests/ci/style_check.py | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a1d6cf05fd6..4e5c2ed19e9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -48,8 +48,6 @@ jobs: path: ${{ runner.temp }}/style_check - name: Check out repository code uses: actions/checkout@v2 - with: - path: 'repo_without_submodules' - name: Style Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} @@ -69,7 +67,6 @@ jobs: uses: actions/checkout@v2 with: submodules: 'recursive' - path: 'repo_with_submodules' - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 18cee175970..f68e5ca8210 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -71,7 +71,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), "repo_with_submodules") + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: @@ -81,6 +81,13 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) + new_repo_path = os.path.join(temp_path, repo_path) + if os.path.exists(new_repo_path): + shutil.rmtree(new_repo_path) + shutil.copytree(repo_path, temp_path) + # this check modify repository so copy it to the temp directory + repo_path = new_repo_path + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") @@ -104,7 +111,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" commit = get_commit(gh, pr_info.sha) try: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index f41120f7de7..2af8514fbfc 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -95,7 +95,7 @@ def update_check_with_curl(check_id): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), 'repo_without_submodules') + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: @@ -127,7 +127,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) + subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) From 18f282858e9bda66ba314fc3f6a9636fdb197b47 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 12:41:31 +0300 Subject: [PATCH 122/238] Add logging --- tests/ci/pvs_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index f68e5ca8210..a63b87d1d59 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -82,11 +82,16 @@ if __name__ == "__main__": os.makedirs(temp_path) new_repo_path = os.path.join(temp_path, repo_path) + logging.info("Will try to copy repo to %s", new_repo_path) if os.path.exists(new_repo_path): + logging.info("Removing old copy") shutil.rmtree(new_repo_path) + + logging.info("Copy repo from %s (exists %s) to %s", repo_path, os.path.exists(repo_path), temp_path) shutil.copytree(repo_path, temp_path) # this check modify repository so copy it to the temp directory repo_path = new_repo_path + logging.info("Repo copy path %s", repo_path) aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") From d510bfbe636c9160a370be4800d790f9570db5f8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 12:59:31 +0300 Subject: [PATCH 123/238] Better --- .github/workflows/main.yml | 4 +++- tests/ci/pvs_check.py | 15 +-------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4e5c2ed19e9..2fb0e54a8ee 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -73,4 +73,6 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} - run: cd $GITHUB_WORKSPACE/tests/ci && python3 pvs_check.py + TEMP_PATH: ${{runner.temp}}/pvs_check + REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse + run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index a63b87d1d59..c254ad74ae4 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -71,26 +71,13 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) + repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) pr_info = PRInfo(event) - - if not os.path.exists(temp_path): - os.makedirs(temp_path) - - new_repo_path = os.path.join(temp_path, repo_path) - logging.info("Will try to copy repo to %s", new_repo_path) - if os.path.exists(new_repo_path): - logging.info("Removing old copy") - shutil.rmtree(new_repo_path) - - logging.info("Copy repo from %s (exists %s) to %s", repo_path, os.path.exists(repo_path), temp_path) - shutil.copytree(repo_path, temp_path) # this check modify repository so copy it to the temp directory - repo_path = new_repo_path logging.info("Repo copy path %s", repo_path) aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") From 68c7f8638ad82a1058f9dacde4cdb64583fd72ff Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 13:34:45 +0300 Subject: [PATCH 124/238] update docker image --- docker/test/pvs/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index f4675d35819..4eeb9855274 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -37,6 +37,8 @@ RUN set -x \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ && dpkg -i "${PKG_VERSION}.deb" +ENV CCACHE_DIR=/test_ouput/ccache + CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ From 5d35ba7d9353e842e435aac372d389b91bc51c70 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 13:41:29 +0300 Subject: [PATCH 125/238] One more time --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 4eeb9855274..06c2c424a74 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -39,7 +39,7 @@ RUN set -x \ ENV CCACHE_DIR=/test_ouput/ccache -CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ From 00d9a62d6cf400bc48c4514c9b1be69e7f3e719f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 14:17:50 +0300 Subject: [PATCH 126/238] Missclick --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 06c2c424a74..77cbd910922 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -37,7 +37,7 @@ RUN set -x \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ && dpkg -i "${PKG_VERSION}.deb" -ENV CCACHE_DIR=/test_ouput/ccache +ENV CCACHE_DIR=/test_output/ccache CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ From 5c2a724a4d616f5ee17884dd9bfd9bfd312aabf4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 27 Sep 2021 11:18:01 +0300 Subject: [PATCH 127/238] Add init worker script --- tests/ci/init_worker.sh | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/ci/init_worker.sh diff --git a/tests/ci/init_worker.sh b/tests/ci/init_worker.sh new file mode 100644 index 00000000000..44cfc89f758 --- /dev/null +++ b/tests/ci/init_worker.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail + +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +./bin/installdependencies.sh From 214272113f9d979331fabbad1bd8022b8a382710 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 14:52:51 +0300 Subject: [PATCH 128/238] Add lambda code for token rotation --- tests/ci/lambda/Dockerfile | 13 ++++ tests/ci/lambda/app.py | 106 +++++++++++++++++++++++++++++++ tests/ci/lambda/requirements.txt | 3 + 3 files changed, 122 insertions(+) create mode 100644 tests/ci/lambda/Dockerfile create mode 100644 tests/ci/lambda/app.py create mode 100644 tests/ci/lambda/requirements.txt diff --git a/tests/ci/lambda/Dockerfile b/tests/ci/lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/lambda/app.py b/tests/ci/lambda/app.py new file mode 100644 index 00000000000..4edd3e8d08c --- /dev/null +++ b/tests/ci/lambda/app.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_runner_registration_token(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key_1" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runner_registration_token = get_runner_registration_token(access_token) + + if push_to_ssm: + import boto3 + + print("Trying to put params into ssm manager") + client = boto3.client('ssm') + client.put_parameter( + Name=ssm_parameter_name, + Value=runner_registration_token, + Type='SecureString', + Overwrite=True) + else: + print("Not push token to AWS Parameter Store, just print:", runner_registration_token) + + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True, 'github_runner_registration_token') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get new token from github to add runners') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store') + parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name) diff --git a/tests/ci/lambda/requirements.txt b/tests/ci/lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From d70ea95b3dfd043ecdc09d2e7c1d9a5edfa16d37 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 16:28:25 +0300 Subject: [PATCH 129/238] First worker version --- tests/ci/init_worker.sh | 37 ------------------------------------- tests/ci/worker/init.sh | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 37 deletions(-) delete mode 100644 tests/ci/init_worker.sh create mode 100644 tests/ci/worker/init.sh diff --git a/tests/ci/init_worker.sh b/tests/ci/init_worker.sh deleted file mode 100644 index 44cfc89f758..00000000000 --- a/tests/ci/init_worker.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.283.1 -export RUNNER_HOME=/home/ubuntu/actions-runner - -apt-get update - -apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg \ - lsb-release \ - python3-pip - -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg - -echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null - -apt-get update - -apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io - -usermod -aG docker ubuntu - -pip install boto3 pygithub requests urllib3 unidiff - -mkdir -p $RUNNER_HOME && cd $RUNNER_HOME - -curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz - -tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz -rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz - -./bin/installdependencies.sh diff --git a/tests/ci/worker/init.sh b/tests/ci/worker/init.sh new file mode 100644 index 00000000000..69432a0c220 --- /dev/null +++ b/tests/ci/worker/init.sh @@ -0,0 +1,18 @@ +#!/usr/bin/bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name `hostname -f` --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh From fca5775fac7e7ea878d67334734b790b0f6056b0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 18:40:06 +0300 Subject: [PATCH 130/238] Disable PVS check --- .github/workflows/main.yml | 44 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2fb0e54a8ee..05ed78d8c07 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,25 +54,25 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - PVS-Check: - needs: DockerHubPush - runs-on: [self-hosted] - steps: - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/pvs_check - - name: Check out repository code - uses: actions/checkout@v2 - with: - submodules: 'recursive' - - name: PVS Check - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} - TEMP_PATH: ${{runner.temp}}/pvs_check - REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse - run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py +# PVS-Check: +# needs: DockerHubPush +# runs-on: [self-hosted] +# steps: +# - name: Download changed images +# uses: actions/download-artifact@v2 +# with: +# name: changed_images +# path: ${{ runner.temp }}/pvs_check +# - name: Check out repository code +# uses: actions/checkout@v2 +# with: +# submodules: 'recursive' +# - name: PVS Check +# env: +# YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} +# YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} +# TEMP_PATH: ${{runner.temp}}/pvs_check +# REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse +# run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py From 070a7cf727c5c20bb63c9ceff2532e6309053886 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 19:08:41 +0300 Subject: [PATCH 131/238] Bump From 95c3eb377bab413ac2d9238d7726898dedf33ee2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 10:47:15 +0300 Subject: [PATCH 132/238] Add finish check --- .github/workflows/main.yml | 12 ++++++++++- tests/ci/finish_check.py | 41 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/ci/finish_check.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 05ed78d8c07..3931bc1538d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,7 +37,7 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/docker_images_check/changed_images.json - Style-Check: + StyleCheck: needs: DockerHubPush runs-on: [self-hosted] steps: @@ -76,3 +76,13 @@ jobs: # TEMP_PATH: ${{runner.temp}}/pvs_check # REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse # run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py + Finish-Check: + needs: [Style-Check, DockerHubPush, CheckLabels] + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py new file mode 100644 index 00000000000..b481c5b658c --- /dev/null +++ b/tests/ci/finish_check.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import logging +from github import Github +import os + +NAME = 'Run Check (actions)' + +def filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + statuses = filter_statuses(list(commit.get_statuses())) + if NAME in statuses and statuses[NAME].state == "pending": + commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) From cb81189bf91a1d5fd7448d4df3ca66f51fb976e2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 10:49:25 +0300 Subject: [PATCH 133/238] Fix workflow --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3931bc1538d..8cb771a0d45 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -76,8 +76,8 @@ jobs: # TEMP_PATH: ${{runner.temp}}/pvs_check # REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse # run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py - Finish-Check: - needs: [Style-Check, DockerHubPush, CheckLabels] + FinishCheck: + needs: [StyleCheck, DockerHubPush, CheckLabels] runs-on: [self-hosted] steps: - name: Check out repository code From bf9ebf42112577018347975ba9d9ec023eb2bf7b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 10:55:12 +0300 Subject: [PATCH 134/238] Import json --- tests/ci/finish_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index b481c5b658c..1b022905cda 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import logging from github import Github +import json import os NAME = 'Run Check (actions)' From 8d29a472fa088468584e370d0124ffb1e8f36175 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 11:02:36 +0300 Subject: [PATCH 135/238] Fix --- tests/ci/finish_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 1b022905cda..89139468fd6 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import logging from github import Github +from pr_info import PRInfo import json import os From 112a009b918b548d4b6d5a21caf8a857d5de46f6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 14:40:20 +0300 Subject: [PATCH 136/238] Update worker script --- tests/ci/{lambda => token_lambda}/Dockerfile | 0 tests/ci/{lambda => token_lambda}/app.py | 0 tests/ci/{lambda => token_lambda}/requirements.txt | 0 tests/ci/worker/init.sh | 4 +++- 4 files changed, 3 insertions(+), 1 deletion(-) rename tests/ci/{lambda => token_lambda}/Dockerfile (100%) rename tests/ci/{lambda => token_lambda}/app.py (100%) rename tests/ci/{lambda => token_lambda}/requirements.txt (100%) diff --git a/tests/ci/lambda/Dockerfile b/tests/ci/token_lambda/Dockerfile similarity index 100% rename from tests/ci/lambda/Dockerfile rename to tests/ci/token_lambda/Dockerfile diff --git a/tests/ci/lambda/app.py b/tests/ci/token_lambda/app.py similarity index 100% rename from tests/ci/lambda/app.py rename to tests/ci/token_lambda/app.py diff --git a/tests/ci/lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt similarity index 100% rename from tests/ci/lambda/requirements.txt rename to tests/ci/token_lambda/requirements.txt diff --git a/tests/ci/worker/init.sh b/tests/ci/worker/init.sh index 69432a0c220..2f6638f14b5 100644 --- a/tests/ci/worker/init.sh +++ b/tests/ci/worker/init.sh @@ -8,11 +8,13 @@ export RUNNER_HOME=/home/ubuntu/actions-runner echo "Receiving token" export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` cd $RUNNER_HOME echo "Going to configure runner" -sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name `hostname -f` --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work echo "Run" sudo -u ubuntu ./run.sh From 0085e5653a81b6689dc1e1977b4ed421f36279a5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 12:00:45 +0300 Subject: [PATCH 137/238] Metrics lambda --- tests/ci/metrics_lambda/Dockerfile | 13 +++ tests/ci/metrics_lambda/app.py | 138 +++++++++++++++++++++++ tests/ci/metrics_lambda/requirements.txt | 3 + 3 files changed, 154 insertions(+) create mode 100644 tests/ci/metrics_lambda/Dockerfile create mode 100644 tests/ci/metrics_lambda/app.py create mode 100644 tests/ci/metrics_lambda/requirements.txt diff --git a/tests/ci/metrics_lambda/Dockerfile b/tests/ci/metrics_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/metrics_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py new file mode 100644 index 00000000000..8002e060dd0 --- /dev/null +++ b/tests/ci/metrics_lambda/app.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key_1" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_runners / total_active_runners * 100, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + +def main(github_secret_key, github_app_id, push_to_cloudwatch): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runners = list_runners(access_token) + if push_to_cloudwatch: + push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + else: + print(runners) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_cloudwatch) diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/metrics_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From 7d028c3a90aacf1ae3817ea87597036a512ec11a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 13:12:58 +0300 Subject: [PATCH 138/238] Add termination lambda --- tests/ci/metrics_lambda/app.py | 7 +- tests/ci/termination_lambda/Dockerfile | 13 ++ tests/ci/termination_lambda/app.py | 230 +++++++++++++++++++ tests/ci/termination_lambda/requirements.txt | 3 + 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests/ci/termination_lambda/Dockerfile create mode 100644 tests/ci/termination_lambda/app.py create mode 100644 tests/ci/termination_lambda/requirements.txt diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index 8002e060dd0..6c6fc594847 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -89,9 +89,14 @@ def push_metrics_to_cloudwatch(listed_runners, namespace): 'Value': total_runners, 'Unit': 'Count', }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + metrics_data.append({ 'MetricName': 'BusyRunnersRatio', - 'Value': busy_runners / total_active_runners * 100, + 'Value': busy_ratio, 'Unit': 'Percent', }) diff --git a/tests/ci/termination_lambda/Dockerfile b/tests/ci/termination_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/termination_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py new file mode 100644 index 00000000000..414ad0a0d0f --- /dev/null +++ b/tests/ci/termination_lambda/app.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key_1" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def how_many_instances_to_kill(event_data): + data_array = event_data['CapacityToTerminate'] + to_kill_by_zone = {} + for av_zone in data_array: + zone_name = av_zone['AvailabilityZone'] + to_kill = av_zone['Capacity'] + if zone_name not in to_kill_by_zone: + to_kill_by_zone[zone_name] = 0 + + to_kill_by_zone[zone_name] += to_kill + return to_kill_by_zone + +def get_candidates_to_be_killed(event_data): + data_array = event_data['Instances'] + instances_by_zone = {} + for instance in data_array: + zone_name = instance['AvailabilityZone'] + instance_id = instance['InstanceId'] + if zone_name not in instances_by_zone: + instances_by_zone[zone_name] = [] + instances_by_zone[zone_name].append(instance_id) + + return instances_by_zone + +def delete_runner(access_token, runner): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response.raise_for_status() + print(f"Response code deleting {runner.name} is {response.status_code}") + return response.status_code == 204 + + +def main(github_secret_key, github_app_id, event): + print("Got event", json.dumps(event, sort_keys=True, indent=4)) + to_kill_by_zone = how_many_instances_to_kill(event) + instances_by_zone = get_candidates_to_be_killed(event) + + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + + runners = list_runners(access_token) + + to_delete_runners = [] + instances_to_kill = [] + for zone in to_kill_by_zone: + num_to_kill = to_kill_by_zone[zone] + candidates = instances_by_zone[zone] + if num_to_kill > len(candidates): + raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}") + + delete_for_av = [] + for candidate in candidates: + if len(delete_for_av) == num_to_kill: + break + for runner in runners: + if runner.name == candidate: + if not runner.busy: + print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}") + delete_for_av.append(runner) + else: + print(f"Runner {runner.name} is busy, not going to delete it") + break + else: + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + if len(delete_for_av) < num_to_kill: + print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") + to_delete_runners += delete_for_av + + print("Got instances to kill: ", ', '.join(instances_to_kill)) + print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) + for runner in to_delete_runners: + if delete_runner(access_token, runner): + print(f"Runner {runner.name} successfuly deleted from github") + instances_to_kill.append(runner.name) + else: + print(f"Cannot delete {runner.name} from github") + + response = { + "InstanceIDs": instances_to_kill + } + print(response) + return response + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + return main(private_key, app_id, event) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + sample_event = { + "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1::autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg", + "AutoScalingGroupName": "my-asg", + "CapacityToTerminate": [ + { + "AvailabilityZone": "us-east-1b", + "Capacity": 1, + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "Capacity": 2, + "InstanceMarketOption": "OnDemand" + } + ], + "Instances": [ + { + "AvailabilityZone": "us-east-1b", + "InstanceId": "i-08d0b3c1a137e02a5", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + ], + "Cause": "SCALE_IN" + } + + main(private_key, args.app_id, sample_event) diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/termination_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From 8a9556dd9367544e0b6185e5ae71babf987eaa7f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 13:39:15 +0300 Subject: [PATCH 139/238] Update termination lambda --- tests/ci/termination_lambda/app.py | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index 414ad0a0d0f..261403dd8be 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -63,6 +63,42 @@ def list_runners(access_token): result.append(desc) return result +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + + def how_many_instances_to_kill(event_data): data_array = event_data['CapacityToTerminate'] to_kill_by_zone = {} @@ -153,6 +189,10 @@ def main(github_secret_key, github_app_id, event): else: print(f"Cannot delete {runner.name} from github") + # push metrics + runners = list_runners(access_token) + push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + response = { "InstanceIDs": instances_to_kill } From f2837569f57712a3d6edd849748ba4028bd4f4c3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 14:26:46 +0300 Subject: [PATCH 140/238] Fixes in termination lambda --- tests/ci/run_check.py | 5 ++++- tests/ci/termination_lambda/app.py | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 70b3ae2ac07..95e827671ca 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -86,7 +86,7 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): # can be skipped entirely. def should_run_checks_for_pr(pr_info): # Consider the labels and whether the user is trusted. - force_labels = set(['force tests', 'release']).intersection(pr_info.labels) + force_labels = set(['force tests']).intersection(pr_info.labels) if force_labels: return True, "Labeled '{}'".format(', '.join(force_labels)) @@ -96,6 +96,9 @@ def should_run_checks_for_pr(pr_info): if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): return False, "Needs 'can be tested' label" + if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + return False, "Don't try new checks for release/backports/cherry-picks" + return True, "No special conditions apply" def get_commit(gh, commit_sha): diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index 261403dd8be..7fd7c400db9 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -162,8 +162,16 @@ def main(github_secret_key, github_app_id, event): delete_for_av = [] for candidate in candidates: - if len(delete_for_av) == num_to_kill: + if candidate not in set([runner.name for runner in runners]): + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + for candidate in candidates: + if len(delete_for_av) + len(instances_to_kill) == num_to_kill: break + if candidate in instances_to_kill: + continue + for runner in runners: if runner.name == candidate: if not runner.busy: @@ -172,9 +180,6 @@ def main(github_secret_key, github_app_id, event): else: print(f"Runner {runner.name} is busy, not going to delete it") break - else: - print(f"Candidate {candidate} was not in runners list, simply delete it") - instances_to_kill.append(candidate) if len(delete_for_av) < num_to_kill: print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") @@ -189,9 +194,9 @@ def main(github_secret_key, github_app_id, event): else: print(f"Cannot delete {runner.name} from github") - # push metrics - runners = list_runners(access_token) - push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + ## push metrics + #runners = list_runners(access_token) + #push_metrics_to_cloudwatch(runners, 'RunnersMetrics') response = { "InstanceIDs": instances_to_kill @@ -262,7 +267,7 @@ if __name__ == "__main__": "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", "InstanceType": "t2.nano", "InstanceMarketOption": "OnDemand" - }, + } ], "Cause": "SCALE_IN" } From 7d92ad66149daac84601a69cde38b03f78668db0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 15:09:06 +0300 Subject: [PATCH 141/238] Remove PVS check --- .github/workflows/main.yml | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8cb771a0d45..49760995dfc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,28 +54,6 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py -# PVS-Check: -# needs: DockerHubPush -# runs-on: [self-hosted] -# steps: -# - name: Download changed images -# uses: actions/download-artifact@v2 -# with: -# name: changed_images -# path: ${{ runner.temp }}/pvs_check -# - name: Check out repository code -# uses: actions/checkout@v2 -# with: -# submodules: 'recursive' -# - name: PVS Check -# env: -# YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} -# YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} -# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} -# TEMP_PATH: ${{runner.temp}}/pvs_check -# REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse -# run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py FinishCheck: needs: [StyleCheck, DockerHubPush, CheckLabels] runs-on: [self-hosted] From 2aa852388fa2e372326603ee78ec4ab04ee72e72 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 15:34:44 +0300 Subject: [PATCH 142/238] Fix style check --- utils/check-style/check-style | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index efdc5f488d2..b2334a8b203 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -162,7 +162,7 @@ find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | gre find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" # There shouldn't be any docker containers outside docker directory -find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" +find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" # There shouldn't be any docker compose files outside docker directory #find $ROOT_PATH -not -path $ROOT_PATH'/tests/testflows*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name '*compose*.yml' -type f 2>/dev/null | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to docker directory:" From 1df70af14e2de27405f6cfdacc651567b6140684 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 16:08:12 +0300 Subject: [PATCH 143/238] Fix style check one more time: --- utils/check-style/check-style | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index b2334a8b203..dc954411918 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -70,7 +70,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet # FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test +pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | From 7a27ce7242abd679650b227051f7598bda854ecd Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 16:20:08 +0300 Subject: [PATCH 144/238] Pull new image each time --- tests/ci/style_check.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 2af8514fbfc..71978379099 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -121,6 +121,15 @@ if __name__ == "__main__": docker_image += ':' + images['clickhouse/style-test'] logging.info("Got docker image %s", docker_image) + for i in range(10): + try: + subprocess.check_output(f"docker pull {docker_image}", shell=True) + break + except Exception as ex: + time.sleep(i * 3) + logging.info("Got execption pulling docker %s", ex) + else: + raise Exception(f"Cannot pull dockerhub for image {docker_image}") if not aws_secret_key_id or not aws_secret_key: logging.info("No secrets, will not upload anything to S3") From 0bf597374fe239af7da624bbf09c54d9111f9fbf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Oct 2021 05:56:32 +0300 Subject: [PATCH 145/238] More enhancements for query obfuscator --- programs/format/Format.cpp | 5 +++++ src/IO/ReadHelpers.h | 16 ++++++++++++---- src/Parsers/obfuscateQueries.cpp | 11 +++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 984543a6c6b..4b0e8ad1ca1 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #pragma GCC diagnostic ignored "-Wunused-function" @@ -114,6 +116,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) registerAggregateFunctions(); registerTableFunctions(); registerStorages(); + registerFormats(); std::unordered_set additional_names; @@ -130,6 +133,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) return FunctionFactory::instance().has(what) || AggregateFunctionFactory::instance().isAggregateFunctionName(what) || TableFunctionFactory::instance().isTableFunctionName(what) + || FormatFactory::instance().isOutputFormat(what) + || FormatFactory::instance().isInputFormat(what) || additional_names.count(what); }; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ca6affbf907..bfb30e8b95c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -276,29 +276,37 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { case '+': { - if (has_sign || has_number) + if (has_sign) { + if (has_number) + return ReturnType(true); + if constexpr (throw_exception) throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters or intermediate sign character", + "Cannot parse number with multiple sign (+/-) characters", ErrorCodes::CANNOT_PARSE_NUMBER); else return ReturnType(false); } + has_sign = true; break; } case '-': { - if (has_sign || has_number) + if (has_sign) { + if (has_number) + return ReturnType(true); + if constexpr (throw_exception) throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters or intermediate sign character", + "Cannot parse number with multiple sign (+/-) characters", ErrorCodes::CANNOT_PARSE_NUMBER); else return ReturnType(false); } + if constexpr (is_signed_v) negative = true; else diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index eb0bf5281c9..c0b57d9b1f5 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -38,7 +38,8 @@ const std::unordered_set keywords "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY" + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY", "OFFSET", + "TRIM", "LTRIM", "RTRIM", "BOTH", "LEADING", "TRAILING" }; const std::unordered_set keep_words @@ -906,7 +907,13 @@ void obfuscateQueries( /// Write quotes and the obfuscated content inside. result.write(*token.begin); - obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + + /// If it is long, just replace it with hash. Long identifiers in queries are usually auto-generated. + if (token.size() > 32) + writeIntText(sipHash64(token.begin + 1, token.size() - 2), result); + else + obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + result.write(token.end[-1]); } else if (token.type == TokenType::Number) From ece880184b4b6bfe48a7428cefe26e15953e20f0 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 3 Oct 2021 20:26:48 +0400 Subject: [PATCH 146/238] Reorganiza contrib IDE folders --- cmake/find/capnp.cmake | 2 - cmake/find/cxx.cmake | 2 - cmake/find/unwind.cmake | 1 - contrib/CMakeLists.txt | 93 ++++++++++++++++++++++++++++++++++++----- 4 files changed, 82 insertions(+), 16 deletions(-) diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake index ee4735bd175..25dfce24ae9 100644 --- a/cmake/find/capnp.cmake +++ b/cmake/find/capnp.cmake @@ -34,8 +34,6 @@ endif() if (CAPNP_LIBRARIES) set (USE_CAPNP 1) elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY) - add_subdirectory(contrib/capnproto-cmake) - set (CAPNP_LIBRARIES capnpc) set (USE_CAPNP 1) set (USE_INTERNAL_CAPNP_LIBRARY 1) diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake index b1da125e219..b96ba1e1b65 100644 --- a/cmake/find/cxx.cmake +++ b/cmake/find/cxx.cmake @@ -50,8 +50,6 @@ endif () if (NOT HAVE_LIBCXX AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) set (LIBCXX_LIBRARY cxx) set (LIBCXXABI_LIBRARY cxxabi) - add_subdirectory(contrib/libcxxabi-cmake) - add_subdirectory(contrib/libcxx-cmake) # Exception handling library is embedded into libcxxabi. diff --git a/cmake/find/unwind.cmake b/cmake/find/unwind.cmake index c9f5f30a5d6..9ae23ae23c7 100644 --- a/cmake/find/unwind.cmake +++ b/cmake/find/unwind.cmake @@ -1,7 +1,6 @@ option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) if (USE_UNWIND) - add_subdirectory(contrib/libunwind-cmake) set (UNWIND_LIBRARIES unwind) set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 140cc0846ec..2c0ddbc8384 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,16 +1,5 @@ # Third-party libraries may have substandard code. -# Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default. -# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will -# appear not in "contrib/" as originally planned here. -get_filename_component (_current_dir_name "${CMAKE_CURRENT_LIST_DIR}" NAME) -if (CMAKE_FOLDER) - set (CMAKE_FOLDER "${CMAKE_FOLDER}/${_current_dir_name}") -else () - set (CMAKE_FOLDER "${_current_dir_name}") -endif () -unset (_current_dir_name) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") @@ -49,6 +38,19 @@ add_subdirectory (replxx-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (nanodbc-cmake) +if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) + add_subdirectory(libcxxabi-cmake) + add_subdirectory(libcxx-cmake) +endif () + +if (USE_UNWIND) + add_subdirectory(libunwind-cmake) +endif () + +if (USE_INTERNAL_CAPNP_LIBRARY AND NOT MISSING_INTERNAL_CAPNP_LIBRARY) + add_subdirectory(capnproto-cmake) +endif () + if (USE_YAML_CPP) add_subdirectory (yaml-cpp-cmake) endif() @@ -347,3 +349,72 @@ endif() if (USE_S2_GEOMETRY) add_subdirectory(s2geometry-cmake) endif() + +# Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. +# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear +# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, +# instead of controlling it via CMAKE_FOLDER. + +function (ensure_target_rooted_in _target _folder) + # Read the original FOLDER property value, if any. + get_target_property (_folder_prop "${_target}" FOLDER) + + # Normalize that value, so we avoid possible repetitions in folder names. + + if (NOT _folder_prop) + set (_folder_prop "") + endif () + + if (CMAKE_FOLDER AND _folder_prop MATCHES "^${CMAKE_FOLDER}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder AND _folder_prop MATCHES "^${_folder}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder) + set (_folder_prop "${_folder}/${_folder_prop}") + endif () + + if (CMAKE_FOLDER) + set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}") + endif () + + message (STATUS "${_target} goes under ${_folder_prop}") + + # Set the updated FOLDER property value back. + set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}") +endfunction () + +function (ensure_own_targets_are_rooted_in _dir _folder) + get_directory_property (_targets DIRECTORY "${_dir}" BUILDSYSTEM_TARGETS) + foreach (_target IN LISTS _targets) + ensure_target_rooted_in ("${_target}" "${_folder}") + endforeach () +endfunction () + +function (ensure_all_targets_are_rooted_in _dir _folder) + ensure_own_targets_are_rooted_in ("${_dir}" "${_folder}") + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_folder}") + endforeach () +endfunction () + +function (organize_ide_folders_2_level _dir) + get_filename_component (_dir_name "${_dir}" NAME) + ensure_own_targets_are_rooted_in ("${_dir}" "${_dir_name}") + + # Note, that we respect only first two levels of nesting, we don't want to + # reorganize target folders further within each third-party dir. + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + get_filename_component (_sub_dir_name "${_sub_dir}" NAME) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_dir_name}/${_sub_dir_name}") + endforeach () +endfunction () + +organize_ide_folders_2_level ("${CMAKE_CURRENT_LIST_DIR}") From 365a6b469e069a174c9c9924e1eee9795bf72bfd Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 3 Oct 2021 20:30:20 +0400 Subject: [PATCH 147/238] Remove debug message --- contrib/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2c0ddbc8384..cc5a6dbc9b7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -381,8 +381,6 @@ function (ensure_target_rooted_in _target _folder) set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}") endif () - message (STATUS "${_target} goes under ${_folder_prop}") - # Set the updated FOLDER property value back. set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}") endfunction () From 7be521b024acf1eff98fb3075a376f60a9d667bf Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 3 Oct 2021 21:07:44 +0400 Subject: [PATCH 148/238] Do not manipulate FOLDER property on INTERFACE library targets --- contrib/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index cc5a6dbc9b7..c671369d126 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -356,6 +356,12 @@ endif() # instead of controlling it via CMAKE_FOLDER. function (ensure_target_rooted_in _target _folder) + # Skip INTERFACE library targets, since FOLDER property is not available for them. + get_target_property (_target_type "${_target}" TYPE) + if (_target_type STREQUAL "INTERFACE_LIBRARY") + return () + endif () + # Read the original FOLDER property value, if any. get_target_property (_folder_prop "${_target}" FOLDER) From 9c7bef4c9d9062a48820233cfed132c2e06d5d7f Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Oct 2021 17:08:47 +0300 Subject: [PATCH 149/238] Remove unused headers and handle exception 'unrecognised option' in clickhouse-local --- programs/local/LocalServer.cpp | 14 +++++--------- ...96_unknown_option_in_clickhouse_local.reference | 1 + .../02096_unknown_option_in_clickhouse_local.sh | 9 +++++++++ 3 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference create mode 100755 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8066650006a..2035406d73a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,8 +1,6 @@ #include "LocalServer.h" #include -#include -#include #include #include #include @@ -10,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -21,19 +18,14 @@ #include #include #include -#include #include -#include -#include #include #include #include #include #include -#include #include #include -#include #include #include #include @@ -45,7 +37,6 @@ #include #include #include -#include #include #include @@ -722,6 +713,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) app.init(argc, argv); return app.run(); } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference new file mode 100644 index 00000000000..96feda5dd3c --- /dev/null +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference @@ -0,0 +1 @@ +Bad arguments: unrecognised option '--unknown-option' diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh new file mode 100755 index 00000000000..ee0e3f3d149 --- /dev/null +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2206 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --unknown-option 2>&1 echo + From 0ad20e661329525c5385de4a43d0a0af94544a0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Oct 2021 03:14:44 +0300 Subject: [PATCH 150/238] Fix test --- src/IO/ReadHelpers.h | 14 ++++++++------ tests/queries/0_stateless/01888_read_int_safe.sql | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index bfb30e8b95c..5077ee2df21 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -276,11 +276,13 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { case '+': { + /// 123+ or +123+, just stop after 123 or +123. + if (has_number) + return ReturnType(true); + + /// No digits read yet, but we already read sign, like ++, -+. if (has_sign) { - if (has_number) - return ReturnType(true); - if constexpr (throw_exception) throw ParsingException( "Cannot parse number with multiple sign (+/-) characters", @@ -294,11 +296,11 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) } case '-': { + if (has_number) + return ReturnType(true); + if (has_sign) { - if (has_number) - return ReturnType(true); - if constexpr (throw_exception) throw ParsingException( "Cannot parse number with multiple sign (+/-) characters", diff --git a/tests/queries/0_stateless/01888_read_int_safe.sql b/tests/queries/0_stateless/01888_read_int_safe.sql index 3caa4878aba..3aea8e38ab0 100644 --- a/tests/queries/0_stateless/01888_read_int_safe.sql +++ b/tests/queries/0_stateless/01888_read_int_safe.sql @@ -3,8 +3,8 @@ select toInt64('+-1'); -- { serverError 72; } select toInt64('++1'); -- { serverError 72; } select toInt64('++'); -- { serverError 72; } select toInt64('+'); -- { serverError 72; } -select toInt64('1+1'); -- { serverError 72; } -select toInt64('1-1'); -- { serverError 72; } +select toInt64('1+1'); -- { serverError 6; } +select toInt64('1-1'); -- { serverError 6; } select toInt64(''); -- { serverError 32; } select toInt64('1'); select toInt64('-1'); From 798d8c7c1772eec4ca679c0f7e42eb5fc3d2b6e6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Oct 2021 03:16:09 +0300 Subject: [PATCH 151/238] Fix test --- .../0_stateless/01425_decimal_parse_big_negative_exponent.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql index 7d0993c1bfc..7f276d1f8d4 100644 --- a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql +++ b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql @@ -1,4 +1,4 @@ -SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 72 } +SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 6 } SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '1E-9' AS x, toDecimal32(x, 0); SELECT '1E-8' AS x, toDecimal32(x, 0); From 8851cb8459fcb490acec6ce7af33a709b0b11539 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 13 Oct 2021 11:37:56 +0800 Subject: [PATCH 152/238] Use upstream replxx --- .gitmodules | 2 +- contrib/replxx | 2 +- src/Client/ClientBaseHelpers.cpp | 44 ++++++++++++++++---------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.gitmodules b/.gitmodules index 74d1049ce01..f9758a69956 100644 --- a/.gitmodules +++ b/.gitmodules @@ -140,7 +140,7 @@ url = https://github.com/ClickHouse-Extras/libc-headers.git [submodule "contrib/replxx"] path = contrib/replxx - url = https://github.com/ClickHouse-Extras/replxx.git + url = https://github.com/AmokHuginnsson/replxx.git [submodule "contrib/avro"] path = contrib/avro url = https://github.com/ClickHouse-Extras/avro.git diff --git a/contrib/replxx b/contrib/replxx index f97765df14f..89abeea7516 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit f97765df14f4a6236d69b8f14b53ef2051ebd95a +Subproject commit 89abeea7516a2a9b6aad7bfecc132f608ff14a3d diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index a530e48ee35..e1c1481c5b4 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -109,29 +109,29 @@ void highlight(const String & query, std::vector & colors {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, {TokenType::DoubleColon, Replxx::Color::BROWN}, - {TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE}, - {TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE}, + {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Comma, Replxx::Color::INTENSE}, - {TokenType::Semicolon, Replxx::Color::INTENSE}, - {TokenType::Dot, Replxx::Color::INTENSE}, - {TokenType::Asterisk, Replxx::Color::INTENSE}, + {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::HereDoc, Replxx::Color::CYAN}, - {TokenType::Plus, Replxx::Color::INTENSE}, - {TokenType::Minus, Replxx::Color::INTENSE}, - {TokenType::Slash, Replxx::Color::INTENSE}, - {TokenType::Percent, Replxx::Color::INTENSE}, - {TokenType::Arrow, Replxx::Color::INTENSE}, - {TokenType::QuestionMark, Replxx::Color::INTENSE}, - {TokenType::Colon, Replxx::Color::INTENSE}, - {TokenType::Equals, Replxx::Color::INTENSE}, - {TokenType::NotEquals, Replxx::Color::INTENSE}, - {TokenType::Less, Replxx::Color::INTENSE}, - {TokenType::Greater, Replxx::Color::INTENSE}, - {TokenType::LessOrEquals, Replxx::Color::INTENSE}, - {TokenType::GreaterOrEquals, Replxx::Color::INTENSE}, - {TokenType::Concatenation, Replxx::Color::INTENSE}, - {TokenType::At, Replxx::Color::INTENSE}, + {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::DoubleAt, Replxx::Color::MAGENTA}, {TokenType::EndOfStream, Replxx::Color::DEFAULT}, @@ -142,7 +142,7 @@ void highlight(const String & query, std::vector & colors {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, {TokenType::ErrorWrongNumber, Replxx::Color::RED}, - { TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }}; + {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; const Replxx::Color unknown_token_color = Replxx::Color::RED; From 4d020c96e0fe2f1725caa6b40354cd7f8014bc4d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 Oct 2021 15:07:50 +0300 Subject: [PATCH 153/238] support nullable arguments in function initializeAggregation --- src/Functions/initializeAggregation.cpp | 1 + .../02097_initializeAggregationNullable.reference | 6 ++++++ .../0_stateless/02097_initializeAggregationNullable.sql | 8 ++++++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02097_initializeAggregationNullable.reference create mode 100644 tests/queries/0_stateless/02097_initializeAggregationNullable.sql diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index e8bd136e704..02db90bfc43 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -40,6 +40,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.reference b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference new file mode 100644 index 00000000000..6d2e42f2ca6 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference @@ -0,0 +1,6 @@ +1 +AggregateFunction(uniqExact, Nullable(String)) +1 +AggregateFunction(uniqExact, Nullable(UInt8)) +1 +1 diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.sql b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql new file mode 100644 index 00000000000..aa4e6d47579 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql @@ -0,0 +1,8 @@ +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable('foo'))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable('foo'))); + +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable(123))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable(123))); + +SELECT initializeAggregation('uniqExactState', toNullable('foo')) = arrayReduce('uniqExactState', [toNullable('foo')]); +SELECT initializeAggregation('uniqExactState', toNullable(123)) = arrayReduce('uniqExactState', [toNullable(123)]); From a1a4df2501816529dcb9e6588acfc72b74b902bc Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 16:34:05 +0300 Subject: [PATCH 154/238] Fix handling exception 'unrecognised option' in clickhouse-local and client --- programs/client/Client.cpp | 10 +--------- programs/client/Client.h | 2 +- programs/local/LocalServer.cpp | 15 +++++---------- programs/local/LocalServer.h | 2 +- src/Client/ClientBase.cpp | 17 ++++++++++++++++- src/Client/ClientBase.h | 3 ++- ...unknown_option_in_clickhouse_local.reference | 3 ++- .../02096_unknown_option_in_clickhouse_local.sh | 2 +- 8 files changed, 29 insertions(+), 25 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index da910430985..d53a57b6eba 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -996,7 +996,7 @@ void Client::printHelpMessage(const OptionsDescription & options_description) } -void Client::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void Client::addOptions(OptionsDescription & options_description) { /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() @@ -1053,14 +1053,6 @@ void Client::addAndCheckOptions(OptionsDescription & options_description, po::va ( "types", po::value(), "types" ); - - cmd_settings.addProgramOptions(options_description.main_description.value()); - /// Parse main commandline options. - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); - po::store(parsed, options); } diff --git a/programs/client/Client.h b/programs/client/Client.h index 43f6deae0b5..2def74ef3fc 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -24,7 +24,7 @@ protected: String getName() const override { return "client"; } void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) override; void processConfig() override; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2035406d73a..2180729438d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -636,7 +634,7 @@ void LocalServer::printHelpMessage(const OptionsDescription & options_descriptio } -void LocalServer::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void LocalServer::addOptions(OptionsDescription & options_description) { options_description.main_description->add_options() ("database,d", po::value(), "database") @@ -655,10 +653,6 @@ void LocalServer::addAndCheckOptions(OptionsDescription & options_description, p ("no-system-tables", "do not attach system tables (better startup time)") ; - - cmd_settings.addProgramOptions(options_description.main_description.value()); - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - po::store(parsed, options); } @@ -713,10 +707,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) app.init(argc, argv); return app.run(); } - catch (const boost::program_options::error & e) + catch (const DB::Exception & e) { - std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + std::cerr << DB::getExceptionMessage(e, false) << std::endl; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; } catch (...) { diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index e14e18adced..ce0df06c86a 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -40,7 +40,7 @@ protected: String getQueryTextPrefix() override; void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector &) override; void processConfig() override; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index baf082a3541..deb22ca60ef 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -72,6 +72,7 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; + extern const int UNRECOGNIZED_ARGUMENTS; } } @@ -1505,6 +1506,19 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume } } +void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + cmd_settings.addProgramOptions(options_description.main_description.value()); + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()); + parser.allow_unregistered(); + po::parsed_options parsed = parser.run(); + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + if (unrecognized_options.size() > 1) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); + po::store(parsed, options); +} + void ClientBase::init(int argc, char ** argv) { @@ -1562,7 +1576,8 @@ void ClientBase::init(int argc, char ** argv) ("stacktrace", "print stack traces of exceptions") ; - addAndCheckOptions(options_description, options, common_arguments); + addOptions(options_description); + parseAndCheckOptions(options_description, options, common_arguments); po::notify(options); if (options.count("version") || options.count("V")) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 070b676366c..cfc0b45ff60 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -91,7 +91,7 @@ protected: }; virtual void printHelpMessage(const OptionsDescription & options_description) = 0; - virtual void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) = 0; + virtual void addOptions(OptionsDescription & options_description) = 0; virtual void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) = 0; @@ -132,6 +132,7 @@ private: void resetOutput(); void outputQueryInfo(bool echo_query_); void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector & external_tables_arguments); + void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); protected: bool is_interactive = false; /// Use either interactive line editing interface or batch mode. diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference index 96feda5dd3c..2c4cf540812 100644 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference @@ -1 +1,2 @@ -Bad arguments: unrecognised option '--unknown-option' +Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) +Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh index ee0e3f3d149..2fabc761d4c 100755 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_LOCAL} --unknown-option 2>&1 echo - +${CLICKHOUSE_CLIENT} --unknown-option 2>&1 echo From ba7b784646bc64619dc62d72c3d27e47e457949f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 16:36:32 +0300 Subject: [PATCH 155/238] Remove catching boost::program_options error in Client --- programs/client/Client.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d53a57b6eba..45314a5d460 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1230,11 +1230,6 @@ int mainEntryClickHouseClient(int argc, char ** argv) client.init(argc, argv); return client.run(); } - catch (const boost::program_options::error & e) - { - std::cerr << "Bad arguments: " << e.what() << std::endl; - return 1; - } catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; From ce22f534c4e64d7c4fe13c3fb1353c76028aa4e7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 28 Sep 2021 15:59:22 +0300 Subject: [PATCH 156/238] Add CapnProto output format, refactor CapnProto input format --- src/Common/ErrorCodes.cpp | 2 + src/Core/Settings.h | 3 +- src/Core/SettingsEnums.cpp | 5 + src/Core/SettingsEnums.h | 2 + src/DataTypes/EnumValues.cpp | 19 + src/DataTypes/EnumValues.h | 4 + src/Formats/CapnProtoUtils.cpp | 406 ++++++++++++++++ src/Formats/CapnProtoUtils.h | 43 ++ src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSchemaInfo.cpp | 6 + src/Formats/FormatSchemaInfo.h | 2 + src/Formats/FormatSettings.h | 14 + src/Formats/registerFormats.cpp | 2 + .../Formats/Impl/CapnProtoRowInputFormat.cpp | 433 +++++++++--------- .../Formats/Impl/CapnProtoRowInputFormat.h | 46 +- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 251 ++++++++++ .../Formats/Impl/CapnProtoRowOutputFormat.h | 53 +++ .../Formats/Impl/ProtobufRowInputFormat.cpp | 3 +- .../Formats/Impl/ProtobufRowOutputFormat.cpp | 4 +- .../0_stateless/02030_capnp_format.reference | 52 +++ .../queries/0_stateless/02030_capnp_format.sh | 109 +++++ .../format_schemas/02030_capnp_enum.capnp | 13 + .../02030_capnp_fake_nullable.capnp | 23 + .../format_schemas/02030_capnp_lists.capnp | 8 + .../02030_capnp_low_cardinality.capnp | 17 + .../02030_capnp_nested_lists_and_tuples.capnp | 36 ++ .../02030_capnp_nested_table.capnp | 20 + .../02030_capnp_nested_tuples.capnp | 23 + .../format_schemas/02030_capnp_nullable.capnp | 22 + .../02030_capnp_simple_types.capnp | 21 + .../format_schemas/02030_capnp_tuples.capnp | 35 ++ .../02030_capnp_unnamed_union.capnp | 10 + 32 files changed, 1416 insertions(+), 272 deletions(-) create mode 100644 src/Formats/CapnProtoUtils.cpp create mode 100644 src/Formats/CapnProtoUtils.h create mode 100644 src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h create mode 100644 tests/queries/0_stateless/02030_capnp_format.reference create mode 100755 tests/queries/0_stateless/02030_capnp_format.sh create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index b6d9b65c28b..1aff1460125 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -589,6 +589,8 @@ M(619, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ M(620, QUERY_NOT_ALLOWED) \ M(621, CANNOT_NORMALIZE_STRING) \ + M(622, CANNOT_PARSE_CAPN_PROTO_SCHEMA) \ + M(623, CAPN_PROTO_BAD_CAST) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a5767955045..f91bf684c85 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -625,7 +625,8 @@ class IColumn; M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ - + \ + M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 8e588b62326..f5497588891 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -116,4 +116,9 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {{"enable", ShortCircuitFunctionEvaluation::ENABLE}, {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) + +IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS, + {{"by_names", FormatSettings::EnumComparingMode::BY_NAMES}, + {"by_values", FormatSettings::EnumComparingMode::BY_VALUES}, + {"by_names_case_insensitive", FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a699da3062c..f57a064241e 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -168,4 +168,6 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) +DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode) + } diff --git a/src/DataTypes/EnumValues.cpp b/src/DataTypes/EnumValues.cpp index 6df899ba9a2..ab5ea0ca249 100644 --- a/src/DataTypes/EnumValues.cpp +++ b/src/DataTypes/EnumValues.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -82,6 +83,24 @@ Names EnumValues::getAllRegisteredNames() const return result; } +template +std::unordered_set EnumValues::getSetOfAllNames(bool to_lower) const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(to_lower ? boost::algorithm::to_lower_copy(value.first) : value.first); + return result; +} + +template +std::unordered_set EnumValues::getSetOfAllValues() const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(value.second); + return result; +} + template class EnumValues; template class EnumValues; diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 1e5e4f55ea7..17c292c5551 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -80,6 +80,10 @@ public: } Names getAllRegisteredNames() const override; + + std::unordered_set getSetOfAllNames(bool to_lower) const; + + std::unordered_set getSetOfAllValues() const; }; } diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp new file mode 100644 index 00000000000..9931785f43e --- /dev/null +++ b/src/Formats/CapnProtoUtils.cpp @@ -0,0 +1,406 @@ +#include + +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_CAPN_PROTO_SCHEMA; + extern const int THERE_IS_NO_COLUMN; + extern const int BAD_TYPE_OF_FIELD; + extern const int CAPN_PROTO_BAD_CAST; + extern const int FILE_DOESNT_EXIST; + extern const int UNKNOWN_EXCEPTION; +} + +capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) +{ + capnp::ParsedSchema schema; + try + { + int fd; + KJ_SYSCALL(fd = open(schema_info.schemaDirectory().data(), O_RDONLY)); + auto schema_dir = kj::newDiskDirectory(kj::OsFileHandle(fd)); + schema = impl.parseFromDirectory(*schema_dir, kj::Path::parse(schema_info.schemaPath()), {}); + } + catch (const kj::Exception & e) + { + /// That's not good to determine the type of error by its description, but + /// this is the only way to do it here, because kj doesn't specify the type of error. + String description = String(e.getDescription().cStr()); + if (description.starts_with("no such directory")) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); + + if (description.starts_with("Parse error")) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); + + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath()); + } + + auto message_maybe = schema.findNested(schema_info.messageName()); + auto * message_schema = kj::_::readMaybe(message_maybe); + if (!message_schema) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "CapnProto schema doesn't contain message with name {}", schema_info.messageName()); + return message_schema->asStruct(); +} + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode) +{ + if (mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) + return boost::algorithm::to_lower_copy(first) == boost::algorithm::to_lower_copy(second); + return first == second; +} + +static const std::map capnp_simple_type_names = +{ + {capnp::schema::Type::Which::BOOL, "Bool"}, + {capnp::schema::Type::Which::VOID, "Void"}, + {capnp::schema::Type::Which::INT8, "Int8"}, + {capnp::schema::Type::Which::INT16, "Int16"}, + {capnp::schema::Type::Which::INT32, "Int32"}, + {capnp::schema::Type::Which::INT64, "Int64"}, + {capnp::schema::Type::Which::UINT8, "UInt8"}, + {capnp::schema::Type::Which::UINT16, "UInt16"}, + {capnp::schema::Type::Which::UINT32, "UInt32"}, + {capnp::schema::Type::Which::UINT64, "UInt64"}, + {capnp::schema::Type::Which::FLOAT32, "Float32"}, + {capnp::schema::Type::Which::FLOAT64, "Float64"}, + {capnp::schema::Type::Which::TEXT, "Text"}, + {capnp::schema::Type::Which::DATA, "Data"}, + {capnp::schema::Type::Which::ENUM, "Enum"}, + {capnp::schema::Type::Which::INTERFACE, "Interface"}, + {capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"}, +}; + +static bool checkIfStructContainsUnnamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() != struct_schema.getNonUnionFields().size(); +} + +static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() == struct_schema.getUnionFields().size(); +} + +/// Get full name of type for better exception messages. +static String getCapnProtoFullTypeName(const capnp::Type & type) +{ + if (type.isStruct()) + { + auto struct_schema = type.asStruct(); + + auto non_union_fields = struct_schema.getNonUnionFields(); + std::vector non_union_field_names; + for (auto nested_field : non_union_fields) + non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + auto union_fields = struct_schema.getUnionFields(); + std::vector union_field_names; + for (auto nested_field : union_fields) + union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; + /// Check if the struct is a named union. + if (non_union_field_names.empty()) + return union_name; + + String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); + /// Check if the struct contains unnamed union. + if (!union_field_names.empty()) + type_name += "," + union_name; + type_name += ")"; + return type_name; + } + + if (type.isList()) + return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; + + if (!capnp_simple_type_names.contains(type.which())) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); + + return capnp_simple_type_names.at(type.which()); +} + +template +static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_type, FormatSettings::EnumComparingMode mode, UInt64 max_value, String & error_message) +{ + if (!capnp_type.isEnum()) + return false; + + auto enum_schema = capnp_type.asEnum(); + bool to_lower = mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE; + const auto * enum_type = assert_cast *>(column_type.get()); + const auto & enum_values = dynamic_cast &>(*enum_type); + + auto names = enum_values.getSetOfAllNames(to_lower); + auto values = enum_values.getSetOfAllValues(); + + std::unordered_set capn_enum_names; + std::unordered_set capn_enum_values; + + auto enumerants = enum_schema.getEnumerants(); + for (auto enumerant : enumerants) + { + String name = enumerant.getProto().getName(); + capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); + auto value = enumerant.getOrdinal(); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES && value > max_value) + { + error_message += "Enum from CapnProto schema contains value that is out of range for Clickhouse Enum"; + return false; + } + capn_enum_values.insert(Type(value)); + } + + if (mode == FormatSettings::EnumComparingMode::BY_NAMES || mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) + { + auto result = names == capn_enum_names; + if (!result) + error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; + return result; + } + + auto result = values == capn_enum_values; + if (!result) + error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + return result; +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message); + +static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + + /// Check that struct is a named union of type VOID and one arbitrary type. + auto struct_schema = capnp_type.asStruct(); + if (!checkIfStructIsNamedUnion(struct_schema)) + return false; + + auto union_fields = struct_schema.getUnionFields(); + if (union_fields.size() != 2) + return false; + + auto first = union_fields[0]; + auto second = union_fields[1]; + + auto nested_type = assert_cast(data_type.get())->getNestedType(); + if (first.getType().isVoid()) + return checkCapnProtoType(second.getType(), nested_type, mode, error_message); + if (second.getType().isVoid()) + return checkCapnProtoType(first.getType(), nested_type, mode, error_message); + return false; +} + +static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + auto struct_schema = capnp_type.asStruct(); + + if (checkIfStructIsNamedUnion(struct_schema)) + return false; + + if (checkIfStructContainsUnnamedUnion(struct_schema)) + { + error_message += "CapnProto struct contains unnamed union"; + return false; + } + + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + if (nested_types.size() != struct_schema.getFields().size()) + { + error_message += "Tuple and Struct types have different sizes"; + return false; + } + + if (!tuple_data_type->haveExplicitNames()) + { + error_message += "Only named Tuple can be converted to CapnProto Struct"; + return false; + } + for (const auto & name : tuple_data_type->getElementNames()) + { + KJ_IF_MAYBE(field, struct_schema.findFieldByName(name)) + { + if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message)) + return false; + } + else + { + error_message += "CapnProto struct doesn't contain a field with name " + name; + return false; + } + } + + return true; +} + +static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isList()) + return false; + auto list_schema = capnp_type.asList(); + auto nested_type = assert_cast(data_type.get())->getNestedType(); + return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message); +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + return capnp_type.isBool() || capnp_type.isUInt8(); + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + return capnp_type.isUInt16(); + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + return capnp_type.isUInt32(); + case TypeIndex::UInt64: + return capnp_type.isUInt64(); + case TypeIndex::Int8: + return capnp_type.isInt8(); + case TypeIndex::Int16: + return capnp_type.isInt16(); + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Int32: + return capnp_type.isInt32(); + case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::Int64: + return capnp_type.isInt64(); + case TypeIndex::Float32: + return capnp_type.isFloat32(); + case TypeIndex::Float64: + return capnp_type.isFloat64(); + case TypeIndex::Enum8: + return checkEnums(capnp_type, data_type, mode, INT8_MAX, error_message); + case TypeIndex::Enum16: + return checkEnums(capnp_type, data_type, mode, INT16_MAX, error_message); + case TypeIndex::Tuple: + return checkTupleType(capnp_type, data_type, mode, error_message); + case TypeIndex::Nullable: + { + auto result = checkNullableType(capnp_type, data_type, mode, error_message); + if (!result) + error_message += "Nullable can be represented only as a named union of type Void and nested type"; + return result; + } + case TypeIndex::Array: + return checkArrayType(capnp_type, data_type, mode, error_message); + case TypeIndex::LowCardinality: + return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message); + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + return capnp_type.isText() || capnp_type.isData(); + default: + return false; + } +} + +static std::pair splitFieldName(const String & name) +{ + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {first, second}; +} + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) + { + auto field_reader = struct_reader.get(*field); + if (nested_name.empty()) + return field_reader; + + if (field_reader.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getReaderByColumnName(field_reader.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name)) + { + if (nested_name.empty()) + return {struct_builder, *field}; + + auto field_builder = struct_builder.get(*field); + if (field_builder.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getStructBuilderAndFieldByColumnName(field_builder.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, schema.findFieldByName(field_name)) + { + if (nested_name.empty()) + return *field; + + if (!field->getType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getFieldByName(field->getType().asStruct(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto schema doesn't contain field with name {}", field_name); +} + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode) +{ + /// Firstly check that struct doesn't contain unnamed union, because we don't support it. + if (checkIfStructContainsUnnamedUnion(schema)) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Schema contains unnamed union that is not supported"); + auto names_and_types = header.getNamesAndTypesList(); + String additional_error_message; + for (auto & [name, type] : names_and_types) + { + auto field = getFieldByName(schema, name); + if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message)) + { + auto e = Exception( + ErrorCodes::CAPN_PROTO_BAD_CAST, + "Cannot convert ClickHouse type {} to CapnProto type {}", + type->getName(), + getCapnProtoFullTypeName(field.getType())); + if (!additional_error_message.empty()) + e.addMessage(additional_error_message); + throw std::move(e); + } + } +} + +} + +#endif diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h new file mode 100644 index 00000000000..93ca0a5e616 --- /dev/null +++ b/src/Formats/CapnProtoUtils.h @@ -0,0 +1,43 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include + +namespace DB +{ +// Wrapper for classes that could throw in destructor +// https://github.com/capnproto/capnproto/issues/553 +template +struct DestructorCatcher +{ + T impl; + template + DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} + ~DestructorCatcher() noexcept try { } catch (...) { return; } +}; + +class CapnProtoSchemaParser : public DestructorCatcher +{ +public: + CapnProtoSchemaParser() {} + + capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); +}; + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode); + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name); + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name); + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode); + +} + +#endif diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d3ff5cbf8a7..63cb26ab87c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -112,6 +112,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSchemaInfo.cpp b/src/Formats/FormatSchemaInfo.cpp index 2605c0bdf04..24c8dfc14f2 100644 --- a/src/Formats/FormatSchemaInfo.cpp +++ b/src/Formats/FormatSchemaInfo.cpp @@ -99,4 +99,10 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String & } } +FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message) + : FormatSchemaInfo( + settings.schema.format_schema, format, require_message, settings.schema.is_server, settings.schema.format_schema_path) +{ +} + } diff --git a/src/Formats/FormatSchemaInfo.h b/src/Formats/FormatSchemaInfo.h index cb041e02116..8c430218af0 100644 --- a/src/Formats/FormatSchemaInfo.h +++ b/src/Formats/FormatSchemaInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -11,6 +12,7 @@ class FormatSchemaInfo { public: FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path); + FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message); /// Returns path to the schema file. const String & schemaPath() const { return schema_path; } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8c894c77e82..ce5f1effa8c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -183,6 +183,20 @@ struct FormatSettings { bool import_nested = false; } orc; + + /// For apnProto format we should determine how to + /// compare ClickHouse Enum and Enum from schema. + enum class EnumComparingMode + { + BY_NAMES, // Names in enums should be the same, values can be different. + BY_NAMES_CASE_INSENSITIVE, // Case-insensitive name comparison. + BY_VALUES, // Values should be the same, names can be different. + }; + + struct + { + EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES; + } capn_proto; }; } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 3e4c0366e8a..f6b4bb7e2e1 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -67,6 +67,7 @@ void registerOutputFormatNull(FormatFactory & factory); void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); +void registerOutputFormatCapnProto(FormatFactory & factory); /// Input only formats. @@ -139,6 +140,7 @@ void registerFormats() registerOutputFormatMySQLWire(factory); registerOutputFormatMarkdown(factory); registerOutputFormatPostgreSQLWire(factory); + registerOutputFormatProcessorsCapnProto(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index fd4b2870bea..8492fc9b623 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -1,7 +1,6 @@ #include "CapnProtoRowInputFormat.h" #if USE_CAPNP -#include #include #include #include @@ -9,198 +8,40 @@ #include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes { - extern const int BAD_TYPE_OF_FIELD; - extern const int THERE_IS_NO_COLUMN; extern const int LOGICAL_ERROR; } -static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i) -{ - CapnProtoRowInputFormat::NestedField field = {{}, i}; - - // Remove leading dot in field definition, e.g. ".msg" -> "msg" - String name(header.safeGetByPosition(i).name); - if (!name.empty() && name[0] == '.') - name.erase(0, 1); - - splitInto<'.', '_'>(field.tokens, name); - return field; -} - - -static Field convertNodeToField(const capnp::DynamicValue::Reader & value) -{ - switch (value.getType()) - { - case capnp::DynamicValue::UNKNOWN: - throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::VOID: - return Field(); - case capnp::DynamicValue::BOOL: - return value.as() ? 1u : 0u; - case capnp::DynamicValue::INT: - return value.as(); - case capnp::DynamicValue::UINT: - return value.as(); - case capnp::DynamicValue::FLOAT: - return value.as(); - case capnp::DynamicValue::TEXT: - { - auto arr = value.as(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::DATA: - { - auto arr = value.as().asChars(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::LIST: - { - auto list_value = value.as(); - Array res(list_value.size()); - for (auto i : kj::indices(list_value)) - res[i] = convertNodeToField(list_value[i]); - - return res; - } - case capnp::DynamicValue::ENUM: - return value.as().getRaw(); - case capnp::DynamicValue::STRUCT: - { - auto struct_value = value.as(); - const auto & fields = struct_value.getSchema().getFields(); - - Tuple tuple(fields.size()); - for (auto i : kj::indices(fields)) - tuple[i] = convertNodeToField(struct_value.get(fields[i])); - - return tuple; - } - case capnp::DynamicValue::CAPABILITY: - throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::ANY_POINTER: - throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - } - return Field(); -} - -static capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::string & field) -{ - KJ_IF_MAYBE(child, node.findFieldByName(field)) - return *child; - else - throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN); -} - - -void CapnProtoRowInputFormat::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader) -{ - /// Columns in a table can map to fields in Cap'n'Proto or to structs. - - /// Store common parents and their tokens in order to backtrack. - std::vector parents; - std::vector parent_tokens; - - capnp::StructSchema cur_reader = reader; - - for (const auto & field : sorted_fields) - { - if (field.tokens.empty()) - throw Exception("Logical error in CapnProtoRowInputFormat", ErrorCodes::LOGICAL_ERROR); - - // Backtrack to common parent - while (field.tokens.size() < parent_tokens.size() + 1 - || !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin())) - { - actions.push_back({Action::POP}); - parents.pop_back(); - parent_tokens.pop_back(); - - if (parents.empty()) - { - cur_reader = reader; - break; - } - else - cur_reader = parents.back().getType().asStruct(); - } - - // Go forward - while (parent_tokens.size() + 1 < field.tokens.size()) - { - const auto & token = field.tokens[parents.size()]; - auto node = getFieldOrThrow(cur_reader, token); - if (node.getType().isStruct()) - { - // Descend to field structure - parents.emplace_back(node); - parent_tokens.emplace_back(token); - cur_reader = node.getType().asStruct(); - actions.push_back({Action::PUSH, node}); - } - else if (node.getType().isList()) - { - break; // Collect list - } - else - throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD); - } - - // Read field from the structure - auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]); - if (node.getType().isList() && !actions.empty() && actions.back().field == node) - { - // The field list here flattens Nested elements into multiple arrays - // In order to map Nested types in Cap'nProto back, they need to be collected - // Since the field names are sorted, the order of field positions must be preserved - // For example, if the fields are { b @0 :Text, a @1 :Text }, the `a` would come first - // even though it's position is second. - auto & columns = actions.back().columns; - auto it = std::upper_bound(columns.cbegin(), columns.cend(), field.pos); - columns.insert(it, field.pos); - } - else - { - actions.push_back({Action::READ, node, {field.pos}}); - } - } -} - -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info) - : IRowInputFormat(std::move(header), in_, std::move(params_)), parser(std::make_shared()) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_) + : IRowInputFormat(std::move(header), in_, std::move(params_)) + , parser(std::make_shared()) + , format_settings(format_settings_) + , column_types(getPort().getHeader().getDataTypes()) + , column_names(getPort().getHeader().getNames()) { // Parse the schema and fetch the root object - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - auto schema = parser->impl.parseDiskFile(info.schemaPath(), info.absoluteSchemaPath(), {}); -#pragma GCC diagnostic pop - - root = schema.getNested(info.messageName()).asStruct(); - - /** - * The schema typically consists of fields in various nested structures. - * Here we gather the list of fields and sort them in a way so that fields in the same structure are adjacent, - * and the nesting level doesn't decrease to make traversal easier. - */ - const auto & sample = getPort().getHeader(); - NestedFieldList list; - size_t num_columns = sample.columns(); - for (size_t i = 0; i < num_columns; ++i) - list.push_back(split(sample, i)); - - // Order list first by value of strings then by length of string vector. - std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; }); - createActions(list, root); + root = parser->getMessageSchema(info); + checkCapnProtoSchemaStructure(root, getPort().getHeader(), format_settings.capn_proto.enum_comparing_mode); } kj::Array CapnProtoRowInputFormat::readMessage() @@ -233,6 +74,186 @@ kj::Array CapnProtoRowInputFormat::readMessage() return msg; } +static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Int8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int64: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime64: + assert_cast &>(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer."); + } +} + +static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::UInt8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::UInt64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer."); + } +} + +static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Float32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Float64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a float."); + } +} + +template +static void insertString(IColumn & column, Value value) +{ + column.insertData(reinterpret_cast(value.begin()), value.size()); +} + +template +static void insertEnum(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicEnum & enum_value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant()); + auto enum_type = assert_cast *>(column_type.get()); + DataTypePtr nested_type = std::make_shared>(); + if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_VALUES) + insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); + else if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_NAMES) + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); + else + { + /// Find the same enum name case insensitive. + String enum_name = enumerant.getProto().getName(); + for (auto & name : enum_type->getAllRegisteredNames()) + { + if (compareEnumNames(name, enum_name, enum_comparing_mode)) + { + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); + break; + } + } + } +} + +static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + if (column_type->lowCardinality()) + { + auto & lc_column = assert_cast(column); + auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); + auto dict_type = assert_cast(column_type.get())->getDictionaryType(); + insertValue(*tmp_column, dict_type, value, enum_comparing_mode); + lc_column.insertFromFullColumn(*tmp_column, 0); + return; + } + + switch (value.getType()) + { + case capnp::DynamicValue::Type::INT: + insertSignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::UINT: + insertUnsignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::FLOAT: + insertFloat(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::BOOL: + insertUnsignedInteger(column, column_type, UInt64(value.as())); + break; + case capnp::DynamicValue::Type::DATA: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::TEXT: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::ENUM: + if (column_type->getTypeId() == TypeIndex::Enum8) + insertEnum(column, column_type, value.as(), enum_comparing_mode); + else + insertEnum(column, column_type, value.as(), enum_comparing_mode); + break; + case capnp::DynamicValue::LIST: + { + auto list_value = value.as(); + auto & column_array = assert_cast(column); + auto & offsets = column_array.getOffsets(); + offsets.push_back(offsets.back() + list_value.size()); + + auto & nested_column = column_array.getData(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + for (const auto & nested_value : list_value) + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + break; + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_value = value.as(); + if (column_type->isNullable()) + { + auto & nullable_column = assert_cast(column); + auto field = *kj::_::readMaybe(struct_value.which()); + if (field.getType().isVoid()) + nullable_column.insertDefault(); + else + { + auto & nested_column = nullable_column.getNestedColumn(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + auto nested_value = struct_value.get(field); + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + nullable_column.getNullMapData().push_back(0); + } + } + else + { + auto & tuple_column = assert_cast(column); + const auto * tuple_type = assert_cast(column_type.get()); + for (size_t i = 0; i != tuple_column.tupleSize(); ++i) + insertValue( + tuple_column.getColumn(i), + tuple_type->getElements()[i], + struct_value.get(tuple_type->getElementNames()[i]), + enum_comparing_mode); + } + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto value type."); + } +} + bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { if (in->eof()) @@ -245,51 +266,12 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension #else capnp::FlatArrayMessageReader msg(array); #endif - std::vector stack; - stack.push_back(msg.getRoot(root)); - for (auto action : actions) + auto root_reader = msg.getRoot(root); + for (size_t i = 0; i != columns.size(); ++i) { - switch (action.type) - { - case Action::READ: - { - Field value = convertNodeToField(stack.back().get(action.field)); - if (action.columns.size() > 1) - { - // Nested columns must be flattened into several arrays - // e.g. Array(Tuple(x ..., y ...)) -> Array(x ...), Array(y ...) - const auto & collected = DB::get(value); - size_t size = collected.size(); - // The flattened array contains an array of a part of the nested tuple - Array flattened(size); - for (size_t column_index = 0; column_index < action.columns.size(); ++column_index) - { - // Populate array with a single tuple elements - for (size_t off = 0; off < size; ++off) - { - const auto & tuple = DB::get(collected[off]); - flattened[off] = tuple[column_index]; - } - auto & col = columns[action.columns[column_index]]; - col->insert(flattened); - } - } - else - { - auto & col = columns[action.columns[0]]; - col->insert(value); - } - - break; - } - case Action::POP: - stack.pop_back(); - break; - case Action::PUSH: - stack.push_back(stack.back().get(action.field).as()); - break; - } + auto value = getReaderByColumnName(root_reader, column_names[i]); + insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); } return true; @@ -302,8 +284,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "CapnProto", true, - settings.schema.is_server, settings.schema.format_schema_path)); + FormatSchemaInfo(settings, "CapnProto", true), settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 0957cd1d681..fc30cf11237 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -4,8 +4,8 @@ #if USE_CAPNP #include +#include #include -#include namespace DB { @@ -22,18 +22,7 @@ class ReadBuffer; class CapnProtoRowInputFormat : public IRowInputFormat { public: - struct NestedField - { - std::vector tokens; - size_t pos; - }; - using NestedFieldList = std::vector; - - /** schema_dir - base path for schema files - * schema_file - location of the capnproto schema, e.g. "schema.capnp" - * root_object - name to the root object, e.g. "Message" - */ - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); String getName() const override { return "CapnProtoRowInputFormat"; } @@ -42,34 +31,11 @@ public: private: kj::Array readMessage(); - // Build a traversal plan from a sorted list of fields - void createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader); - - /* Action for state machine for traversing nested structures. */ - using BlockPositionList = std::vector; - struct Action - { - enum Type { POP, PUSH, READ }; - Type type{}; - capnp::StructSchema::Field field{}; - BlockPositionList columns{}; - }; - - // Wrapper for classes that could throw in destructor - // https://github.com/capnproto/capnproto/issues/553 - template - struct DestructorCatcher - { - T impl; - template - DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} - ~DestructorCatcher() noexcept try { } catch (...) { return; } - }; - using SchemaParser = DestructorCatcher; - - std::shared_ptr parser; + std::shared_ptr parser; capnp::StructSchema root; - std::vector actions; + const FormatSettings format_settings; + DataTypes column_types; + Names column_names; }; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp new file mode 100644 index 00000000000..b299e1fc00a --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -0,0 +1,251 @@ +#include +#if USE_CAPNP + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +CapnProtoOutputStream::CapnProtoOutputStream(WriteBuffer & out_) : out(out_) +{ +} + +void CapnProtoOutputStream::write(const void * buffer, size_t size) +{ + out.write(reinterpret_cast(buffer), size); +} + +CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique(out_)), format_settings(format_settings_) +{ + schema = schema_parser.getMessageSchema(info); + checkCapnProtoSchemaStructure(schema, getPort(PortKind::Main).getHeader(), format_settings.capn_proto.enum_comparing_mode); +} + +template +static capnp::DynamicEnum getDynamicEnum( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + const capnp::EnumSchema & enum_schema, + FormatSettings::EnumComparingMode mode) +{ + const auto * enum_data_type = assert_cast *>(data_type.get()); + EnumValue enum_value = column->getInt(row_num); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) + return capnp::DynamicEnum(enum_schema, enum_value); + + auto enum_name = enum_data_type->getNameForValue(enum_value); + for (const auto enumerant : enum_schema.getEnumerants()) + { + if (compareEnumNames(String(enum_name), enumerant.getProto().getName(), mode)) + return capnp::DynamicEnum(enumerant); + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert CLickHouse Enum value to CapnProto Enum"); +} + +static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & column, size_t row_num, capnp::DynamicStruct::Builder & struct_builder, capnp::StructSchema::Field field) +{ + if (const auto * array_column = checkAndGetColumn(*column)) + { + size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1]; + return struct_builder.init(field, size); + } + + if (field.getType().isStruct()) + return struct_builder.init(field); + + return struct_builder.get(field); +} + +static std::optional convertToDynamicValue(const ColumnPtr & column, const DataTypePtr & data_type, size_t row_num, capnp::DynamicValue::Builder builder, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + /// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor. + + if (data_type->lowCardinality()) + { + const auto * lc_column = assert_cast(column.get()); + const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); + size_t index = lc_column->getIndexAt(row_num); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode); + } + + switch (builder.getType()) + { + case capnp::DynamicValue::Type::INT: + /// We allow output DateTime64 as Int64. + if (WhichDataType(data_type).isDateTime64()) + return capnp::DynamicValue::Reader(assert_cast *>(column.get())->getElement(row_num)); + return capnp::DynamicValue::Reader(column->getInt(row_num)); + case capnp::DynamicValue::Type::UINT: + return capnp::DynamicValue::Reader(column->getUInt(row_num)); + case capnp::DynamicValue::Type::BOOL: + return capnp::DynamicValue::Reader(column->getBool(row_num)); + case capnp::DynamicValue::Type::FLOAT: + return capnp::DynamicValue::Reader(column->getFloat64(row_num)); + case capnp::DynamicValue::Type::ENUM: + { + auto enum_schema = builder.as().getSchema(); + if (data_type->getTypeId() == TypeIndex::Enum8) + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + } + case capnp::DynamicValue::Type::DATA: + { + auto data = column->getDataAt(row_num); + return capnp::DynamicValue::Reader(capnp::Data::Reader(reinterpret_cast(data.data), data.size)); + } + case capnp::DynamicValue::Type::TEXT: + { + auto data = String(column->getDataAt(row_num)); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data(), data.size())); + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_builder = builder.as(); + auto nested_struct_schema = struct_builder.getSchema(); + /// Struct can be represent Tuple or Naullable (named union with two fields) + if (data_type->isNullable()) + { + const auto * nullable_type = assert_cast(data_type.get()); + const auto * nullable_column = assert_cast(column.get()); + auto fields = nested_struct_schema.getUnionFields(); + if (nullable_column->isNullAt(row_num)) + { + auto null_field = fields[0].getType().isVoid() ? fields[0] : fields[1]; + struct_builder.set(null_field, capnp::Void()); + } + else + { + auto value_field = fields[0].getType().isVoid() ? fields[1] : fields[0]; + struct_builder.clear(value_field); + const auto & nested_column = nullable_column->getNestedColumnPtr(); + auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode); + if (value) + struct_builder.set(value_field, std::move(*value)); + } + } + else + { + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + const auto & nested_columns = assert_cast(column.get())->getColumns(); + for (const auto & name : tuple_data_type->getElementNames()) + { + auto pos = tuple_data_type->getPositionByName(name); + auto field_builder + = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode); + if (value) + struct_builder.set(name, std::move(*value)); + } + } + return std::nullopt; + } + case capnp::DynamicValue::Type::LIST: + { + auto list_builder = builder.as(); + const auto * array_column = assert_cast(column.get()); + const auto & nested_column = array_column->getDataPtr(); + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + const auto & offsets = array_column->getOffsets(); + auto offset = offsets[row_num - 1]; + size_t size = offsets[row_num] - offset; + + const auto * nested_array_column = checkAndGetColumn(*nested_column); + for (size_t i = 0; i != size; ++i) + { + capnp::DynamicValue::Builder value_builder; + /// For nested arrays we need to initialize nested list builder. + if (nested_array_column) + { + const auto & nested_offset = nested_array_column->getOffsets(); + size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1]; + value_builder = list_builder.init(i, nested_array_size); + } + else + value_builder = list_builder[i]; + + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode); + if (value) + list_builder.set(i, std::move(*value)); + } + return std::nullopt; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto type."); + } +} + +void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + capnp::MallocMessageBuilder message; + capnp::DynamicStruct::Builder root = message.initRoot(schema); + for (size_t i = 0; i != columns.size(); ++i) + { + auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); + auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode); + if (value) + struct_builder.set(field, std::move(*value)); + } + + capnp::writeMessage(*output_stream, message); +} + +void registerOutputFormatProcessorsCapnProto(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("CapnProto", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, params, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + }); +} + +} + +#else + +namespace DB +{ +class FormatFactory; +void registerOutputFormatProcessorsCapnProto(FormatFactory &) {} +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h new file mode 100644 index 00000000000..0f321071d62 --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -0,0 +1,53 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class CapnProtoOutputStream : public kj::OutputStream +{ +public: + CapnProtoOutputStream(WriteBuffer & out_); + + void write(const void * buffer, size_t size) override; + +private: + WriteBuffer & out; +}; + +class CapnProtoRowOutputFormat : public IRowOutputFormat +{ +public: + CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_); + + String getName() const override { return "CapnProtoRowOutputFormat"; } + + void write(const Columns & columns, size_t row_num) override; + + void writeField(const IColumn &, const ISerialization &, size_t) override { } + +private: + Names column_names; + DataTypes column_types; + capnp::StructSchema schema; + std::unique_ptr output_stream; + const FormatSettings format_settings; + CapnProtoSchemaParser schema_parser; +}; + +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index a5e6b7ec480..df7b7102739 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -67,8 +67,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, - settings.schema.is_server, settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 12c5e98797a..29cd9be79bc 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -64,9 +64,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) { return std::make_shared( buf, header, params, - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", - true, settings.schema.is_server, - settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), settings, with_length_delimiter); }); diff --git a/tests/queries/0_stateless/02030_capnp_format.reference b/tests/queries/0_stateless/02030_capnp_format.reference new file mode 100644 index 00000000000..2b2307bfc6a --- /dev/null +++ b/tests/queries/0_stateless/02030_capnp_format.reference @@ -0,0 +1,52 @@ +-1 1 -1000 1000 -10000000 1000000 -1000000000 1000000000 123.123 123123123.12312312 Some string fixed Some data 2000-01-06 2000-06-01 19:42:42 2000-04-01 11:21:33.123 +-1 1 -1000 1000 -10000000 1000000 -1000000000 1000000000 123.123 123123123.12312312 Some string fixed Some data 2000-01-06 2000-06-01 19:42:42 2000-04-01 11:21:33.123 +1 (2,(3,4)) (((5))) +1 (2,(3,4)) (((5))) +1 [1,2,3] [[[1,2,3],[4,5,6]],[[7,8,9],[]],[]] +1 [1,2,3] [[[1,2,3],[4,5,6]],[[7,8,9],[]],[]] +1 ((2,[[3,4],[5,6],[]]),[([[(7,8),(9,10)],[(11,12),(13,14)],[]],[([15,16,17]),([])])]) +1 ((2,[[3,4],[5,6],[]]),[([[(7,8),(9,10)],[(11,12),(13,14)],[]],[([15,16,17]),([])])]) +[1,2,3] [[4,5,6],[],[7,8]] [(9,10),(11,12),(13,14)] +[1,2,3] [[4,5,6],[],[7,8]] [(9,10),(11,12),(13,14)] +1 [1,NULL,2] (1) +\N [NULL,NULL,42] (NULL) +1 [1,NULL,2] (1) +\N [NULL,NULL,42] (NULL) +one +two +tHrEe +oNe +tWo +threE +first +second +third +OK +OK +OK +OK +one two ['one',NULL,'two',NULL] +two \N [NULL] +one two ['one',NULL,'two',NULL] +two \N [NULL] +0 1 2 +1 2 3 +2 3 4 +3 4 5 +4 5 6 +(0,(1,(2))) +(1,(2,(3))) +(2,(3,(4))) +(3,(4,(5))) +(4,(5,(6))) +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh new file mode 100755 index 00000000000..99807cc1738 --- /dev/null +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp +touch $CAPN_PROTO_FILE + +SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02030 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_simple_types"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_simple_types (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixed FixedString(5), data String, date Date, datetime DateTime, datetime64 DateTime64(3)) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types values (-1, 1, -1000, 1000, -10000000, 1000000, -1000000000, 1000000000, 123.123, 123123123.123123123, 'Some string', 'fixed', 'Some data', '2000-01-06', '2000-06-01 19:42:42', '2000-04-01 11:21:33.123')" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_simple_types" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_tuples (value UInt64, tuple1 Tuple(one UInt64, two Tuple(three UInt64, four UInt64)), tuple2 Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples VALUES (1, (2, (3, 4)), (((5))))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_lists" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_lists (value UInt64, list1 Array(UInt64), list2 Array(Array(Array(UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists VALUES (1, [1, 2, 3], [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], []], []])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_lists" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_lists_and_tuples (value UInt64, nested Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples VALUES (1, ((2, [[3, 4], [5, 6], []]), [([[(7, 8), (9, 10)], [(11, 12), (13, 14)], []], [([15, 16, 17]), ([])])]))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_lists_and_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_table (nested Nested(value UInt64, array Array(UInt64), tuple Tuple(one UInt64, two UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table VALUES ([1, 2, 3], [[4, 5, 6], [], [7, 8]], [(9, 10), (11, 12), (13, 14)])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_table" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nullable" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nullable (nullable Nullable(UInt64), array Array(Nullable(UInt64)), tuple Tuple(nullable Nullable(UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable VALUES (1, [1, Null, 2], (1)), (Null, [Null, Null, 42], (Null))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nullable" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(number, 'Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2)') AS value FROM numbers(3) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_enum:Message'" > $CAPN_PROTO_FILE + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'oNe\' = 1, \'tWo\' = 2, \'threE\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 0, \'second\' = 1, \'third\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'three\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2, \'four\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 1, \'two\' = 2, \'three\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_low_cardinality (lc1 LowCardinality(String), lc2 LowCardinality(Nullable(String)), lc3 Array(LowCardinality(Nullable(String)))) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality VALUES ('one', 'two', ['one', Null, 'two', Null]), ('two', Null, [Null])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_low_cardinality" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(tuple(number, tuple(number + 1, tuple(number + 2))), 'Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') AS a FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a_b UInt64, a_c_d UInt64, a_c_e_f UInt64') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a_b, number + 1 AS a_c_d, number + 2 AS a_c_e_f FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(bb UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(ff UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="SELECT number AS uint64 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Array(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Int32') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +rm $CAPN_PROTO_FILE +rm -rf $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp new file mode 100644 index 00000000000..f033b177a45 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp @@ -0,0 +1,13 @@ +@0x9ef128e10a8010b2; + +struct Message +{ + value @0 : EnumType; + + enum EnumType + { + one @0; + two @1; + tHrEe @2; + } +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp new file mode 100644 index 00000000000..a027692e4bc --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp @@ -0,0 +1,23 @@ +@0xd8dd7b35452d1c4c; + +struct FakeNullable1 +{ + union + { + value @0 : Text; + null @1 : Void; + trash @2 : Text; + } +} + +struct FakeNullable2 +{ + value @0 : Text; + null @1 : Void; +} + +struct Message +{ + nullable1 @0 : FakeNullable1; + nullable2 @1 : FakeNullable2; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp new file mode 100644 index 00000000000..78fe3cf551e --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp @@ -0,0 +1,8 @@ +@0x9ef128e10a8010b7; + +struct Message +{ + value @0 : UInt64; + list1 @1 : List(UInt64); + list2 @2 : List(List(List(UInt64))); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp new file mode 100644 index 00000000000..0958889f0d8 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp @@ -0,0 +1,17 @@ +@0x9ef128e10a8010b7; + +struct NullableText +{ + union + { + value @0 : Text; + null @1 : Void; + } +} + +struct Message +{ + lc1 @0 : Text; + lc2 @1 : NullableText; + lc3 @2 : List(NullableText); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp new file mode 100644 index 00000000000..11fa99f62f5 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp @@ -0,0 +1,36 @@ +@0x9ef128e10a8010b2; + +struct Nested1 +{ + b @0 : UInt64; + c @1 : List(List(UInt64)); +} + +struct Nested2 +{ + e @0 : List(List(Nested3)); + h @1 : List(Nested4); +} + +struct Nested3 +{ + f @0 : UInt64; + g @1 : UInt64; +} + +struct Nested4 +{ + k @0 : List(UInt64); +} + +struct Nested +{ + a @0 : Nested1; + d @1 : List(Nested2); +} + +struct Message +{ + value @0 : UInt64; + nested @1 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp new file mode 100644 index 00000000000..42f17246d58 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp @@ -0,0 +1,20 @@ +@0x9ef128e10a8010b3; + + +struct Nested1 +{ + one @0 : UInt64; + two @1 : UInt64; +} + +struct Nested +{ + value @0 : List(UInt64); + array @1 : List(List(UInt64)); + tuple @2 : List(Nested1); +} + +struct Message +{ + nested @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp new file mode 100644 index 00000000000..161c1bbaea6 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp @@ -0,0 +1,23 @@ +@0x9ef128e12a8010b2; + +struct Nested1 +{ + d @0 : UInt64; + e @1 : Nested2; +} + +struct Nested2 +{ + f @0 : UInt64; +} + +struct Nested +{ + b @0 : UInt64; + c @1 : Nested1; +} + +struct Message +{ + a @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp new file mode 100644 index 00000000000..41254911710 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp @@ -0,0 +1,22 @@ +@0x9ef128e10a8010b2; + +struct NullableUInt64 +{ + union + { + value @0 : UInt64; + null @1 : Void; + } +} + +struct Tuple +{ + nullable @0 : NullableUInt64; +} + +struct Message +{ + nullable @0 : NullableUInt64; + array @1 : List(NullableUInt64); + tuple @2 : Tuple; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp new file mode 100644 index 00000000000..a85bbbc511b --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp @@ -0,0 +1,21 @@ +@0xd9dd7b35452d1c4f; + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + float32 @8 : Float32; + float64 @9 : Float64; + string @10 : Text; + fixed @11 : Text; + data @12 : Data; + date @13 : UInt16; + datetime @14 : UInt32; + datetime64 @15 : Int64; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp new file mode 100644 index 00000000000..21c3f0eb2e1 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp @@ -0,0 +1,35 @@ +@0x9ef128e10a8010b8; + +struct Nested5 +{ + x @0 : UInt64; +} + +struct Nested4 +{ + nested2 @0 : Nested5; +} + +struct Nested3 +{ + nested1 @0 : Nested4; +} + +struct Nested2 +{ + three @0 : UInt64; + four @1 : UInt64; +} + +struct Nested1 +{ + one @0 : UInt64; + two @1 : Nested2; +} + +struct Message +{ + value @0 : UInt64; + tuple1 @1 : Nested1; + tuple2 @2 : Nested3; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp new file mode 100644 index 00000000000..9fb5e37bfea --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp @@ -0,0 +1,10 @@ +@0xd8dd7b35452d1c4f; + +struct Message +{ + union + { + a @0 : UInt64; + b @1 : Text; + } +} From 1cd938fbba61053e5a2d77b53afa14d7a35436ce Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 28 Sep 2021 16:07:00 +0300 Subject: [PATCH 157/238] Fix typo --- src/Formats/FormatSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index ce5f1effa8c..403ccbc6763 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -184,7 +184,7 @@ struct FormatSettings bool import_nested = false; } orc; - /// For apnProto format we should determine how to + /// For capnProto format we should determine how to /// compare ClickHouse Enum and Enum from schema. enum class EnumComparingMode { From c97f375728eb372ddc50a927372685bce7e5226a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 28 Sep 2021 17:51:10 +0300 Subject: [PATCH 158/238] Fix style --- tests/queries/0_stateless/02030_capnp_format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 99807cc1738..03b43c007d8 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -12,7 +12,7 @@ touch $CAPN_PROTO_FILE SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 -mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +mkdir -p ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ From ed8818a773a82dc47ca4bb88e565267c8c954dcb Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 28 Sep 2021 20:03:03 +0300 Subject: [PATCH 159/238] Fix style, better check in enum comparison --- src/Formats/CapnProtoUtils.cpp | 12 +++++++----- tests/queries/0_stateless/02030_capnp_format.sh | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 9931785f43e..974688e7560 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -153,16 +153,18 @@ static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_ std::unordered_set capn_enum_values; auto enumerants = enum_schema.getEnumerants(); + /// In CapnProto Enum fields are numbered sequentially starting from zero. + if (mode == FormatSettings::EnumComparingMode::BY_VALUES && enumerants.size() > max_value) + { + error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; + return false; + } + for (auto enumerant : enumerants) { String name = enumerant.getProto().getName(); capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); auto value = enumerant.getOrdinal(); - if (mode == FormatSettings::EnumComparingMode::BY_VALUES && value > max_value) - { - error_message += "Enum from CapnProto schema contains value that is out of range for Clickhouse Enum"; - return false; - } capn_enum_values.insert(Type(value)); } diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 03b43c007d8..1a0efe4ed07 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -106,4 +106,4 @@ $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FRO $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; rm $CAPN_PROTO_FILE -rm -rf $SCHEMADIR/$SERVER_SCHEMADIR +rm -rf {$SCHEMADIR:?}/{$SERVER_SCHEMADIR:?} From 17ed293470d65738a0404ea53cff6cbda58b5a61 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 29 Sep 2021 14:21:20 +0300 Subject: [PATCH 160/238] Fix test --- tests/queries/0_stateless/02030_capnp_format.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 1a0efe4ed07..e6592142560 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -12,7 +12,7 @@ touch $CAPN_PROTO_FILE SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 -mkdir -p ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ @@ -106,4 +106,4 @@ $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FRO $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; rm $CAPN_PROTO_FILE -rm -rf {$SCHEMADIR:?}/{$SERVER_SCHEMADIR:?} +rm -rf ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} From f88a2ad653f4a5ed2f0dc5a9d008020e91b0a09a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 29 Sep 2021 15:08:53 +0300 Subject: [PATCH 161/238] Handle exception when cannot extract value from struct, add test for it --- src/Formats/CapnProtoUtils.cpp | 11 ++++++++++- .../queries/0_stateless/02030_capnp_format.reference | 1 + tests/queries/0_stateless/02030_capnp_format.sh | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 974688e7560..9176579f672 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -331,7 +331,16 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re auto [field_name, nested_name] = splitFieldName(name); KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) { - auto field_reader = struct_reader.get(*field); + capnp::DynamicValue::Reader field_reader; + try + { + field_reader = struct_reader.get(*field); + } + catch (const kj::Exception & e) + { + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot extract field value from struct by provided schema, error: {} Perhaps the data was generated by another schema", String(e.getDescription().cStr())); + } + if (nested_name.empty()) return field_reader; diff --git a/tests/queries/0_stateless/02030_capnp_format.reference b/tests/queries/0_stateless/02030_capnp_format.reference index 2b2307bfc6a..8c3c81b5bc3 100644 --- a/tests/queries/0_stateless/02030_capnp_format.reference +++ b/tests/queries/0_stateless/02030_capnp_format.reference @@ -50,3 +50,4 @@ OK OK OK OK +OK diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index e6592142560..c24b85109da 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -92,6 +92,10 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tup $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(bb UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(ff UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'string String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL'; + + $CLICKHOUSE_CLIENT --query="SELECT number AS uint64 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" > $CAPN_PROTO_FILE $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Array(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; From 9ddcdbba39bda24408874207762f8ffb669058df Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 30 Sep 2021 23:19:21 +0300 Subject: [PATCH 162/238] Add INCORRECT_DATA error code --- src/Formats/CapnProtoUtils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 9176579f672..2cc20abedd0 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int CAPN_PROTO_BAD_CAST; extern const int FILE_DOESNT_EXIST; extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_DATA; } capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) From 5d16dc7f9aa82b9952578e6672cc9ab84bd5f0d4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Oct 2021 16:02:32 +0300 Subject: [PATCH 163/238] Try to fix tests, update capnp lib to eliminate problem with UB sanitizer --- contrib/capnproto | 2 +- contrib/capnproto-cmake/CMakeLists.txt | 1 + src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp | 6 +++--- tests/queries/0_stateless/02030_capnp_format.sh | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/contrib/capnproto b/contrib/capnproto index a00ccd91b37..c8189ec3c27 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit a00ccd91b3746ef2ab51d40fe3265829949d1ace +Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593 diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 9f6e076cc7d..274be8c5eeb 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -45,6 +45,7 @@ set (CAPNP_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/serialize-packed.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/stream.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-loader.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/dynamic.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/stringify.c++" diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index b299e1fc00a..d256fe8f160 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -129,8 +129,8 @@ static std::optional convertToDynamicValue(const Co } case capnp::DynamicValue::Type::TEXT: { - auto data = String(column->getDataAt(row_num)); - return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data(), data.size())); + auto data = column->getDataAt(row_num); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data, data.size)); } case capnp::DynamicValue::Type::STRUCT: { @@ -220,7 +220,7 @@ void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode); if (value) - struct_builder.set(field, std::move(*value)); + struct_builder.set(field, *value); } capnp::writeMessage(*output_stream, message); diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index c24b85109da..23e626d6d96 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=/$($CLICKHOUSE_CLIENT --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR From dd4421d4b1131c246f762646abbd4534aa7a8489 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 5 Oct 2021 14:12:54 +0300 Subject: [PATCH 164/238] Fix build --- contrib/capnproto-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 274be8c5eeb..05446355535 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -64,6 +64,7 @@ set (CAPNPC_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/lexer.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/grammar.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/parser.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/generics.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/node-translator.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/compiler.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-parser.c++" From 95790b8a1c25d293b227d2e968a16d5a4d918e68 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Oct 2021 13:51:00 +0300 Subject: [PATCH 165/238] Update CapnProtoUtils.cpp --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 2cc20abedd0..59f63243e28 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -43,7 +43,7 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. String description = String(e.getDescription().cStr()); - if (description.starts_with("no such directory")) + if (description.starts_with("No such file or directory")) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); if (description.starts_with("Parse error")) From 9ec6930c152af476cbaba2994419c73509b93d9a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Oct 2021 21:12:49 +0300 Subject: [PATCH 166/238] Better exception handling --- src/Formats/CapnProtoUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 59f63243e28..1f0e6cf2cac 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -43,10 +43,10 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. String description = String(e.getDescription().cStr()); - if (description.starts_with("No such file or directory")) + if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); - if (description.starts_with("Parse error")) + if (description.find("Parse error") != String::npos) throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath()); From 9b909f3f30f93b44eaf65ee8433733f75abfd99c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 7 Oct 2021 10:58:37 +0300 Subject: [PATCH 167/238] Try to fix test --- tests/queries/0_stateless/02030_capnp_format.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 23e626d6d96..02c4fc96c82 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=/$($CLICKHOUSE_CLIENT --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR From f754881e1fd8a42764bfef0b74973abba415808e Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 7 Oct 2021 15:28:01 +0300 Subject: [PATCH 168/238] Fix output String data into Text CapnProto type --- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index d256fe8f160..2e32c962177 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -88,7 +88,13 @@ static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & col return struct_builder.get(field); } -static std::optional convertToDynamicValue(const ColumnPtr & column, const DataTypePtr & data_type, size_t row_num, capnp::DynamicValue::Builder builder, FormatSettings::EnumComparingMode enum_comparing_mode) +static std::optional convertToDynamicValue( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + capnp::DynamicValue::Builder builder, + FormatSettings::EnumComparingMode enum_comparing_mode, + std::vector> & temporary_text_data_storage) { /// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor. @@ -97,7 +103,7 @@ static std::optional convertToDynamicValue(const Co const auto * lc_column = assert_cast(column.get()); const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); size_t index = lc_column->getIndexAt(row_num); - return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage); } switch (builder.getType()) @@ -129,8 +135,16 @@ static std::optional convertToDynamicValue(const Co } case capnp::DynamicValue::Type::TEXT: { - auto data = column->getDataAt(row_num); - return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data, data.size)); + /// In TEXT type data should be null-terminated, but ClickHouse String data could not be. + /// To make data null-terminated we should copy it to temporary String object, but + /// capnp::Text::Reader works only with pointer to the data and it's size, so we should + /// guarantee that new String object life time is longer than capnp::Text::Reader life time. + /// To do this we store new String object in a temporary storage, passed in this function + /// by reference. We use unique_ptr instead of just String to avoid pointers + /// invalidation on vector reallocation. + temporary_text_data_storage.push_back(std::make_unique(column->getDataAt(row_num))); + auto & data = temporary_text_data_storage.back(); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data->data(), data->size())); } case capnp::DynamicValue::Type::STRUCT: { @@ -153,7 +167,7 @@ static std::optional convertToDynamicValue(const Co struct_builder.clear(value_field); const auto & nested_column = nullable_column->getNestedColumnPtr(); auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); - auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(value_field, std::move(*value)); } @@ -168,7 +182,7 @@ static std::optional convertToDynamicValue(const Co auto pos = tuple_data_type->getPositionByName(name); auto field_builder = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); - auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(name, std::move(*value)); } @@ -199,7 +213,7 @@ static std::optional convertToDynamicValue(const Co else value_builder = list_builder[i]; - auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode); + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) list_builder.set(i, std::move(*value)); } @@ -213,12 +227,15 @@ static std::optional convertToDynamicValue(const Co void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) { capnp::MallocMessageBuilder message; + /// Temporary storage for data that will be outputted in fields with CapnProto type TEXT. + /// See comment in convertToDynamicValue() for more details. + std::vector> temporary_text_data_storage; capnp::DynamicStruct::Builder root = message.initRoot(schema); for (size_t i = 0; i != columns.size(); ++i) { auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); - auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode); + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(field, *value); } From 5daed60eaec542047682e279f49ed0c65b8116a2 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 8 Oct 2021 13:23:27 +0300 Subject: [PATCH 169/238] Skip test in case of replicated database --- tests/queries/0_stateless/02030_capnp_format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 02c4fc96c82..aa2fe6c1b35 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 8d1c51c422ed16ee8a5548f72aba360a73230ffa Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Oct 2021 18:18:04 +0300 Subject: [PATCH 170/238] Update Client.cpp --- programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 45314a5d460..a5e4bd45c7f 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -89,7 +89,6 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int TOO_DEEP_RECURSION; extern const int NETWORK_ERROR; - extern const int UNRECOGNIZED_ARGUMENTS; extern const int AUTHENTICATION_FAILED; } From 4800749d32e42912c8c34ab664403d9fea5fa75e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 14 Oct 2021 23:56:28 +0800 Subject: [PATCH 171/238] make Ctrl-J to commit --- base/base/ReplxxLineReader.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 9bf6ec41255..38867faf5d5 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -177,6 +177,10 @@ ReplxxLineReader::ReplxxLineReader( /// bind C-p/C-n to history-previous/history-next like readline. rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + + /// bind C-j to ENTER action. + rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }); + /// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind /// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but /// it also binded to M-p/M-n). From 89c1a04ef4eb2819631266f6051a1dfe0c818ecb Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 21:35:56 +0300 Subject: [PATCH 172/238] Fix comments --- src/Formats/CapnProtoUtils.cpp | 118 ++++++++++-------- src/Formats/registerFormats.cpp | 2 +- .../Formats/Impl/CapnProtoRowInputFormat.cpp | 27 ++-- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 4 +- 4 files changed, 85 insertions(+), 66 deletions(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 1f0e6cf2cac..4b9993d5a74 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -42,7 +42,7 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn { /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. - String description = String(e.getDescription().cStr()); + auto description = std::string_view(e.getDescription().cStr()); if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); @@ -82,7 +82,6 @@ static const std::map capnp_simple_type_name {capnp::schema::Type::Which::FLOAT64, "Float64"}, {capnp::schema::Type::Which::TEXT, "Text"}, {capnp::schema::Type::Which::DATA, "Data"}, - {capnp::schema::Type::Which::ENUM, "Enum"}, {capnp::schema::Type::Which::INTERFACE, "Interface"}, {capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"}, }; @@ -100,40 +99,56 @@ static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema) /// Get full name of type for better exception messages. static String getCapnProtoFullTypeName(const capnp::Type & type) { - if (type.isStruct()) + switch (type.which()) { - auto struct_schema = type.asStruct(); + case capnp::schema::Type::Which::STRUCT: + { + auto struct_schema = type.asStruct(); - auto non_union_fields = struct_schema.getNonUnionFields(); - std::vector non_union_field_names; - for (auto nested_field : non_union_fields) - non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + auto non_union_fields = struct_schema.getNonUnionFields(); + std::vector non_union_field_names; + for (auto nested_field : non_union_fields) + non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); - auto union_fields = struct_schema.getUnionFields(); - std::vector union_field_names; - for (auto nested_field : union_fields) - union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + auto union_fields = struct_schema.getUnionFields(); + std::vector union_field_names; + for (auto nested_field : union_fields) + union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); - String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; - /// Check if the struct is a named union. - if (non_union_field_names.empty()) - return union_name; + String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; + /// Check if the struct is a named union. + if (non_union_field_names.empty()) + return union_name; - String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); - /// Check if the struct contains unnamed union. - if (!union_field_names.empty()) - type_name += "," + union_name; - type_name += ")"; - return type_name; + String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); + /// Check if the struct contains unnamed union. + if (!union_field_names.empty()) + type_name += "," + union_name; + type_name += ")"; + return type_name; + } + case capnp::schema::Type::Which::LIST: + return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; + case capnp::schema::Type::Which::ENUM: + { + auto enum_schema = type.asEnum(); + String enum_name = "Enum("; + auto enumerants = enum_schema.getEnumerants(); + for (size_t i = 0; i != enumerants.size(); ++i) + { + enum_name += String(enumerants[i].getProto().getName()) + " = " + std::to_string(enumerants[i].getOrdinal()); + if (i + 1 != enumerants.size()) + enum_name += ", "; + } + enum_name += ")"; + return enum_name; + } + default: + auto it = capnp_simple_type_names.find(type.which()); + if (it == capnp_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); + return it->second; } - - if (type.isList()) - return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; - - if (!capnp_simple_type_names.contains(type.which())) - throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); - - return capnp_simple_type_names.at(type.which()); } template @@ -147,39 +162,38 @@ static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_ const auto * enum_type = assert_cast *>(column_type.get()); const auto & enum_values = dynamic_cast &>(*enum_type); - auto names = enum_values.getSetOfAllNames(to_lower); - auto values = enum_values.getSetOfAllValues(); - - std::unordered_set capn_enum_names; - std::unordered_set capn_enum_values; - auto enumerants = enum_schema.getEnumerants(); - /// In CapnProto Enum fields are numbered sequentially starting from zero. - if (mode == FormatSettings::EnumComparingMode::BY_VALUES && enumerants.size() > max_value) + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) { - error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; - return false; + /// In CapnProto Enum fields are numbered sequentially starting from zero. + if (enumerants.size() > max_value) + { + error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; + return false; + } + + auto values = enum_values.getSetOfAllValues(); + std::unordered_set capn_enum_values; + for (auto enumerant : enumerants) + capn_enum_values.insert(Type(enumerant.getOrdinal())); + auto result = values == capn_enum_values; + if (!result) + error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + return result; } + auto names = enum_values.getSetOfAllNames(to_lower); + std::unordered_set capn_enum_names; + for (auto enumerant : enumerants) { String name = enumerant.getProto().getName(); capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); - auto value = enumerant.getOrdinal(); - capn_enum_values.insert(Type(value)); } - if (mode == FormatSettings::EnumComparingMode::BY_NAMES || mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) - { - auto result = names == capn_enum_names; - if (!result) - error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; - return result; - } - - auto result = values == capn_enum_values; + auto result = names == capn_enum_names; if (!result) - error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; return result; } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index f6b4bb7e2e1..acaf6f28492 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -140,7 +140,7 @@ void registerFormats() registerOutputFormatMySQLWire(factory); registerOutputFormatMarkdown(factory); registerOutputFormatPostgreSQLWire(factory); - registerOutputFormatProcessorsCapnProto(factory); + registerOutputFormatCapnProto(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 8492fc9b623..4d000bb1f35 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -148,20 +148,25 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant()); auto enum_type = assert_cast *>(column_type.get()); DataTypePtr nested_type = std::make_shared>(); - if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_VALUES) - insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); - else if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_NAMES) - insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); - else + switch (enum_comparing_mode) { - /// Find the same enum name case insensitive. - String enum_name = enumerant.getProto().getName(); - for (auto & name : enum_type->getAllRegisteredNames()) + case FormatSettings::EnumComparingMode::BY_VALUES: + insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); + return; + case FormatSettings::EnumComparingMode::BY_NAMES: + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); + return; + case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE: { - if (compareEnumNames(name, enum_name, enum_comparing_mode)) + /// Find the same enum name case insensitive. + String enum_name = enumerant.getProto().getName(); + for (auto & name : enum_type->getAllRegisteredNames()) { - insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); - break; + if (compareEnumNames(name, enum_name, enum_comparing_mode)) + { + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); + break; + } } } } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 2e32c962177..b5e2b83c23b 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -243,9 +243,9 @@ void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) capnp::writeMessage(*output_stream, message); } -void registerOutputFormatProcessorsCapnProto(FormatFactory & factory) +void registerOutputFormatCapnProto(FormatFactory & factory) { - factory.registerOutputFormatProcessor("CapnProto", []( + factory.registerOutputFormat("CapnProto", []( WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, From 8729201208c374a27df726233e5c17515f2ffb95 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 21:36:57 +0300 Subject: [PATCH 173/238] Remove redundant move --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 4b9993d5a74..b9a28bd3fb3 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -422,7 +422,7 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo getCapnProtoFullTypeName(field.getType())); if (!additional_error_message.empty()) e.addMessage(additional_error_message); - throw std::move(e); + throw e; } } } From 2da8180613a106e26d091497cda1fc52d8cb905a Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 21:39:09 +0300 Subject: [PATCH 174/238] Add space after comma --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index b9a28bd3fb3..1dc37ff51ec 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -123,7 +123,7 @@ static String getCapnProtoFullTypeName(const capnp::Type & type) String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); /// Check if the struct contains unnamed union. if (!union_field_names.empty()) - type_name += "," + union_name; + type_name += ", " + union_name; type_name += ")"; return type_name; } From df81d3f74a630c3d674eb51b9116d139419f8707 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Oct 2021 14:52:24 +0300 Subject: [PATCH 175/238] Fix build in fast test --- src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index b5e2b83c23b..58f88c5c7cf 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -262,7 +262,7 @@ void registerOutputFormatCapnProto(FormatFactory & factory) namespace DB { class FormatFactory; -void registerOutputFormatProcessorsCapnProto(FormatFactory &) {} +void registerOutputFormatCapnProto(FormatFactory &) {} } #endif // USE_CAPNP From be4fc79d32cfad558202d380141a449f7a543cbf Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Oct 2021 18:29:27 +0300 Subject: [PATCH 176/238] Better handling exceptions, update tests --- programs/client/Client.cpp | 14 +++----- programs/local/LocalServer.cpp | 6 ++++ src/Client/ClientBase.cpp | 15 +++++--- .../01527_clickhouse_local_optimize.sh | 2 +- .../01528_clickhouse_local_prepare_parts.sh | 16 ++++----- .../0_stateless/01600_detach_permanently.sh | 2 +- ..._bad_options_in_client_and_local.reference | 12 +++++++ .../02096_bad_options_in_client_and_local.sh | 34 +++++++++++++++++++ ...known_option_in_clickhouse_local.reference | 2 -- ...2096_unknown_option_in_clickhouse_local.sh | 9 ----- 10 files changed, 78 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference create mode 100755 tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh delete mode 100644 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference delete mode 100755 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a5e4bd45c7f..3c50acb1df6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -25,9 +25,6 @@ #endif #include #include -#include -#include -#include #include #include #include "Common/MemoryTracker.h" @@ -35,13 +32,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -53,16 +48,12 @@ #include #include #include -#include -#include -#include #include #include #include #include -#include #include "TestTags.h" #ifndef __clang__ @@ -1234,6 +1225,11 @@ int mainEntryClickHouseClient(int argc, char ** argv) std::cerr << DB::getExceptionMessage(e, false) << std::endl; return 1; } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2180729438d..9e67f04699a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -652,6 +652,7 @@ void LocalServer::addOptions(OptionsDescription & options_description) ("logger.level", po::value(), "Log level") ("no-system-tables", "do not attach system tables (better startup time)") + ("path", po::value(), "Storage path") ; } @@ -713,6 +714,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) auto code = DB::getCurrentExceptionCode(); return code ? code : 1; } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index deb22ca60ef..631d3f2bcc3 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1510,12 +1510,19 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, { cmd_settings.addProgramOptions(options_description.main_description.value()); /// Parse main commandline options. - auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()); - parser.allow_unregistered(); + auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered(); po::parsed_options parsed = parser.run(); - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + + /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). + unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); + po::store(parsed, options); } diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index d63765fc179..c1d5c357308 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -10,6 +10,6 @@ rm -rf "${WORKING_FOLDER_01527}" mkdir -p "${WORKING_FOLDER_01527}" # OPTIMIZE was crashing due to lack of temporary volume in local -${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}" +${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" --path="${WORKING_FOLDER_01527}" rm -rf "${WORKING_FOLDER_01527}" diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh index 8684582ad45..95ecbf09cf5 100755 --- a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -36,10 +36,10 @@ ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WO EOF ## feed the table -${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" --path="${WORKING_FOLDER_01528}" ## check the parts were created -${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" --path="${WORKING_FOLDER_01528}" ################# @@ -49,36 +49,36 @@ cat < "${WORKING_FOLDER_01528}/metadata/local/stdin.sql" ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); EOF -cat <&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + +${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + + diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference deleted file mode 100644 index 2c4cf540812..00000000000 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference +++ /dev/null @@ -1,2 +0,0 @@ -Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) -Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh deleted file mode 100755 index 2fabc761d4c..00000000000 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2206 - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -${CLICKHOUSE_LOCAL} --unknown-option 2>&1 echo -${CLICKHOUSE_CLIENT} --unknown-option 2>&1 echo From 9525437499311d154198bc9b8e1e22d95986c600 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 13:17:00 +0300 Subject: [PATCH 177/238] Less threads in local, fix Ok. printing --- programs/local/LocalServer.cpp | 5 +- src/Client/LocalConnection.cpp | 17 +++--- src/Interpreters/Context.cpp | 9 ++++ src/Interpreters/Context.h | 3 ++ src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++ .../02049_clickhouse_local_merge_tree.expect | 53 +++++++++++++++++++ ...2049_clickhouse_local_merge_tree.reference | 0 7 files changed, 84 insertions(+), 12 deletions(-) create mode 100755 tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect create mode 100644 tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 30082caaac1..cdd5ae13f99 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -514,19 +514,16 @@ void LocalServer::processConfig() format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); insert_format = "Values"; + /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; else insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); - /// Skip networking - /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); - global_context->initializeBackgroundExecutors(); - setupUsers(); /// Limit on total number of concurrently executing queries. diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index efd302622dd..e1324146330 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -266,19 +266,19 @@ bool LocalConnection::poll(size_t) } } - if (state->is_finished && send_progress && !state->sent_progress) - { - state->sent_progress = true; - next_packet_type = Protocol::Server::Progress; - return true; - } - if (state->is_finished) { finishQuery(); return true; } + if (send_progress && !state->sent_progress) + { + state->sent_progress = true; + next_packet_type = Protocol::Server::Progress; + return true; + } + if (state->block && state->block.value()) { next_packet_type = Protocol::Server::Data; @@ -292,7 +292,8 @@ bool LocalConnection::pollImpl() { Block block; auto next_read = pullBlock(block); - if (block) + + if (block && !state->io.null_format) { state->block.emplace(block); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 98acc786aa9..0ef92eaed39 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2895,8 +2895,15 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } +bool Context::isBackgroundExecutorsInitialized() const +{ + return is_background_executors_initialized; +} + void Context::initializeBackgroundExecutors() { + assert(!is_background_executors_initialized); + const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; /// With this executor we can execute more tasks than threads we have @@ -2943,6 +2950,8 @@ void Context::initializeBackgroundExecutors() LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", getSettingsRef().background_common_pool_size, getSettingsRef().background_common_pool_size); + + is_background_executors_initialized = true; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 247dbc74f22..15c4376aa6d 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -293,6 +293,8 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; + /// Has initializeBackgroundExecutors() method been executed? + bool is_background_executors_initialized = false; public: @@ -862,6 +864,7 @@ public: /// Background executors related methods void initializeBackgroundExecutors(); + bool isBackgroundExecutorsInitialized() const; MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const; OrdinaryBackgroundExecutorPtr getMovesExecutor() const; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6d38c55bd62..5b993bce724 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -833,6 +833,15 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database.empty() ? current_database : create.database; + auto global_context = getContext()->getGlobalContext(); + if (global_context + && global_context->getApplicationType() == Context::ApplicationType::LOCAL + && !global_context->isBackgroundExecutorsInitialized() + && create.storage && endsWith(create.storage->engine->name, "MergeTree")) + { + global_context->initializeBackgroundExecutors(); + } + // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns_list) { diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect new file mode 100755 index 00000000000..17b98b077d5 --- /dev/null +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -0,0 +1,53 @@ +#!/usr/bin/expect -f +# Tags: no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } + +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" +expect ":) " + +send -- "drop table if exists t\r" +expect "Ok." + +send -- "create table t engine=MergeTree() order by tuple() as select 1\r" +expect "Ok." + +send -- "set optimize_on_insert = 0\r" +expect "Ok." + +send -- "drop table if exists tt\r" +expect "Ok." + +send -- "create table tt (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 2, 2), ('2020-01-01', 1, 1)\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 0, 0)\r" +expect "Ok." + +send -- "OPTIMIZE TABLE tt\r" +expect "Ok." + +send -- "select * from tt order by version format TSV\r" +expect "2020-01-01\t2\t2" + +send -- "drop table tt\r" +expect "Ok." +send -- "drop table t\r" +expect "Ok." + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d From 8a94e26bece5c3d6b5206acda686f1b0f7de4229 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sat, 16 Oct 2021 18:51:42 +0800 Subject: [PATCH 178/238] init --- src/Functions/ReplaceRegexpImpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 3e80dd5b337..1caced9cbde 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -110,7 +110,7 @@ struct ReplaceRegexpImpl res_data.resize(res_data.size() + bytes_to_copy); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; - start_pos += bytes_to_copy + match.length(); + start_pos += bytes_to_copy + (match.length() > 0 ? match.length() : 1); /// Do substitution instructions for (const auto & it : instructions) @@ -129,7 +129,7 @@ struct ReplaceRegexpImpl } } - if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one) /// Stop after match of zero length, to avoid infinite loop. can_finish_current_string = true; } else From 6752be4c4ea3f8d7ee3056f6927d7f966c103de4 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sat, 16 Oct 2021 23:11:45 +0800 Subject: [PATCH 179/238] fix bug and add test --- src/Functions/ReplaceRegexpImpl.h | 16 ++++++++++++++-- .../02100_replaceRegexpAll_bug.reference | 3 +++ .../0_stateless/02100_replaceRegexpAll_bug.sql | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference create mode 100644 tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 1caced9cbde..7662b747feb 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -96,6 +96,9 @@ struct ReplaceRegexpImpl re2_st::StringPiece matches[max_captures]; size_t start_pos = 0; + bool is_first_match = true; + bool is_start_pos_added_one = false; + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -103,6 +106,9 @@ struct ReplaceRegexpImpl if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { + if (is_start_pos_added_one) + start_pos -= 1; + const auto & match = matches[0]; size_t bytes_to_copy = (match.data() - input.data()) - start_pos; @@ -110,7 +116,12 @@ struct ReplaceRegexpImpl res_data.resize(res_data.size() + bytes_to_copy); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; - start_pos += bytes_to_copy + (match.length() > 0 ? match.length() : 1); + start_pos += bytes_to_copy + match.length(); + if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) + { + start_pos += 1; + is_start_pos_added_one = true; + } /// Do substitution instructions for (const auto & it : instructions) @@ -129,8 +140,9 @@ struct ReplaceRegexpImpl } } - if (replace_one) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || !is_first_match) /// Stop after match of zero length, to avoid infinite loop. can_finish_current_string = true; + is_first_match = false; } else can_finish_current_string = true; diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference new file mode 100644 index 00000000000..2bb40778ca6 --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -0,0 +1,3 @@ +aaaabb +b aaaa +aaaa diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql new file mode 100644 index 00000000000..d0caeacfa0e --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -0,0 +1,3 @@ +SELECT trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT trim(both 'b ' FROM 'b aaaabb ') x; From 1541593b2f6cb6e991af22b5a1885f6848f908b1 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sat, 16 Oct 2021 23:28:29 +0800 Subject: [PATCH 180/238] add notes --- src/Functions/ReplaceRegexpImpl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 7662b747feb..a297be42aaf 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -117,6 +117,8 @@ struct ReplaceRegexpImpl memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); + + /// To avoid infinite loop. if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) { start_pos += 1; @@ -140,7 +142,7 @@ struct ReplaceRegexpImpl } } - if (replace_one || !is_first_match) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || !is_first_match) can_finish_current_string = true; is_first_match = false; } From bd2c016cf32eb0ca9649c289baee07032294e480 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sun, 17 Oct 2021 00:33:57 +0800 Subject: [PATCH 181/238] fix bug --- src/Functions/ReplaceRegexpImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index a297be42aaf..b2c5470cd96 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -142,7 +142,7 @@ struct ReplaceRegexpImpl } } - if (replace_one || !is_first_match) + if (replace_one || (!is_first_match && match.length() == 0)) can_finish_current_string = true; is_first_match = false; } From d34d752688ec706694a05c5fd0c568c651b57c14 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 18:37:46 +0000 Subject: [PATCH 182/238] Fix tests --- programs/local/LocalServer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cdd5ae13f99..0c5f64ea913 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -579,6 +579,11 @@ void LocalServer::processConfig() { String path = global_context->getPath(); + /// When tables are loaded from .sql we initialize background executors + /// regardless there are MergeTree tables or not, because no better place was found. + /// In other cases it will be initialized only when there are mergeTree tables. + global_context->initializeBackgroundExecutors(); + /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); From b034c913db468b9db95d47ea8882172e54763541 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 15 Oct 2021 14:16:41 +0800 Subject: [PATCH 183/238] update --- contrib/replxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/replxx b/contrib/replxx index 89abeea7516..b0c266c2d8a 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 89abeea7516a2a9b6aad7bfecc132f608ff14a3d +Subproject commit b0c266c2d8a835784181e17292b421848c78c6b8 From 131aa7701738cfc057e6f452afb79a424aae7f81 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sun, 17 Oct 2021 11:21:58 +0800 Subject: [PATCH 184/238] fix style --- src/Functions/ReplaceRegexpImpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index b2c5470cd96..678189f8558 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -98,7 +98,7 @@ struct ReplaceRegexpImpl size_t start_pos = 0; bool is_first_match = true; bool is_start_pos_added_one = false; - + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -117,7 +117,7 @@ struct ReplaceRegexpImpl memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); - + /// To avoid infinite loop. if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) { From c2faf450d129ac9a81337b96fbb819ef22edf1e7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Oct 2021 06:37:51 +0300 Subject: [PATCH 185/238] Fix error --- src/IO/ReadHelpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index e4452a7af0a..fda8c213ebf 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -281,7 +281,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { /// 123+ or +123+, just stop after 123 or +123. if (has_number) - return ReturnType(true); + goto end; /// No digits read yet, but we already read sign, like ++, -+. if (has_sign) @@ -300,7 +300,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) case '-': { if (has_number) - return ReturnType(true); + goto end; if (has_sign) { From 059fc1de6997ca36e8b7b009ff31a175282bd02a Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 17 Oct 2021 11:42:36 +0300 Subject: [PATCH 186/238] Allow symlinks in file storage --- src/Common/filesystemHelpers.cpp | 7 +++-- src/Common/filesystemHelpers.h | 5 ++-- src/Dictionaries/FileDictionarySource.cpp | 2 +- src/Dictionaries/LibraryDictionarySource.cpp | 8 +---- src/Storages/StorageFile.cpp | 10 +++++-- .../02051_symlinks_to_user_files.reference | 1 + .../02051_symlinks_to_user_files.sh | 30 +++++++++++++++++++ 7 files changed, 47 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/02051_symlinks_to_user_files.reference create mode 100755 tests/queries/0_stateless/02051_symlinks_to_user_files.sh diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 89214ad496e..f9fe8c97a14 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -118,7 +118,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p return absolute_path.starts_with(absolute_prefix_path); } -bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) +bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) { /// Differs from pathStartsWith in how `path` is normalized before comparison. /// Make `path` absolute if it was relative and put it into normalized form: remove @@ -140,13 +140,14 @@ bool pathStartsWith(const String & path, const String & prefix_path) return pathStartsWith(filesystem_path, filesystem_prefix_path); } -bool symlinkStartsWith(const String & path, const String & prefix_path) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path) { auto filesystem_path = std::filesystem::path(path); auto filesystem_prefix_path = std::filesystem::path(prefix_path); - return symlinkStartsWith(filesystem_path, filesystem_prefix_path); + return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path); } + } diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index de5802cde6d..f626198920e 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,8 +35,9 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -/// Returns true if symlink starts with prefix path -bool symlinkStartsWith(const String & path, const String & prefix_path); +/// Same as pathStartsWith, but wihtout canonization, i.e. allowed to check symlinks. +/// (Path is made absolute and normalized) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index 8c1f099f344..7fd2dbf80f1 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -31,7 +31,7 @@ FileDictionarySource::FileDictionarySource( , context(context_) { auto user_files_path = context->getUserFilesPath(); - if (created_from_ddl && !pathStartsWith(filepath, user_files_path)) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(filepath, user_files_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path); } diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index f117cfb179e..42683fb884c 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -41,13 +41,7 @@ LibraryDictionarySource::LibraryDictionarySource( , context(Context::createCopy(context_)) { auto dictionaries_lib_path = context->getDictionariesLibPath(); - bool path_checked = false; - if (fs::is_symlink(path)) - path_checked = symlinkStartsWith(path, dictionaries_lib_path); - else - path_checked = pathStartsWith(path, dictionaries_lib_path); - - if (created_from_ddl && !path_checked) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(path, dictionaries_lib_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, dictionaries_lib_path); if (!fs::exists(path)) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 4ae55272db6..24377017987 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -124,8 +125,8 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di return; /// "/dev/null" is allowed for perf testing - if (!startsWith(table_path, db_dir_path) && table_path != "/dev/null") - throw Exception("File is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!fileOrSymlinkPathStartsWith(table_path, db_dir_path) && table_path != "/dev/null") + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File `{}` is not inside `{}`", table_path, db_dir_path); if (fs::exists(table_path) && fs::is_directory(table_path)) throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); @@ -140,7 +141,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user fs_table_path = user_files_absolute_path / fs_table_path; Strings paths; - const String path = fs::weakly_canonical(fs_table_path); + /// Do not use fs::canonical or fs::weakly_canonical. + /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. + String path = std::filesystem::absolute(fs_table_path); + path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { std::error_code error; diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh new file mode 100755 index 00000000000..3a9882a441c --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +mkdir -p "${user_files_path}/" +chmod 777 "${user_files_path}" + +export FILE="test_symlink_${CLICKHOUSE_DATABASE}" + +symlink_path=${user_files_path}/${FILE} +file_path=$CUR_DIR/${FILE} + +function cleanup() +{ + rm ${symlink_path} ${file_path} +} +trap cleanup EXIT + +touch ${file_path} +ln -s ${file_path} ${symlink_path} + +${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; +${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; + From cd22ca616752c07f1808eeb676c9747693e88a1e Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 13:13:47 +0300 Subject: [PATCH 187/238] Update filesystemHelpers.h --- src/Common/filesystemHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index f626198920e..2b805ce0c68 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,7 +35,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -/// Same as pathStartsWith, but wihtout canonization, i.e. allowed to check symlinks. +/// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. /// (Path is made absolute and normalized) bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); From 07b44713b63b0e5292987f4937435830076dd203 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 13:45:32 +0300 Subject: [PATCH 188/238] Ping CI --- src/Common/filesystemHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index 2b805ce0c68..fc3a4f15573 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -36,7 +36,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p bool pathStartsWith(const String & path, const String & prefix_path); /// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. -/// (Path is made absolute and normalized) +/// (Path is made absolute and normalized.) bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } From 5633865df159132e65242d9ce09f9f0206174ed8 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 14:16:40 +0300 Subject: [PATCH 189/238] Update src/Storages/StorageFile.cpp Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 24377017987..bdc0c203d59 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -143,7 +143,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user Strings paths; /// Do not use fs::canonical or fs::weakly_canonical. /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. - String path = std::filesystem::absolute(fs_table_path); + String path = fs::absolute(fs_table_path); path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { From 5dab184d8b0c2ef1e59d5e194b21316750598e04 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 18:21:43 +0300 Subject: [PATCH 190/238] Update 02051_symlinks_to_user_files.sh --- tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index 3a9882a441c..7d1fffba74d 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -16,6 +16,8 @@ export FILE="test_symlink_${CLICKHOUSE_DATABASE}" symlink_path=${user_files_path}/${FILE} file_path=$CUR_DIR/${FILE} +chmod +w ${file_path} + function cleanup() { rm ${symlink_path} ${file_path} From 301caa80918f36ce32139d8e6554e314ba494183 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Oct 2021 20:52:21 +0300 Subject: [PATCH 191/238] Update test --- .../0_stateless/01425_decimal_parse_big_negative_exponent.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql index 7f276d1f8d4..1387206b882 100644 --- a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql +++ b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql @@ -1,4 +1,4 @@ -SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 6 } +SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '1E-9' AS x, toDecimal32(x, 0); SELECT '1E-8' AS x, toDecimal32(x, 0); From a8a7ba90056d09dfaa7ab717a992f0535fcddc00 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:09:17 +0300 Subject: [PATCH 192/238] Update 02051_symlinks_to_user_files.sh --- tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index 7d1fffba74d..53c50542b06 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -16,7 +16,9 @@ export FILE="test_symlink_${CLICKHOUSE_DATABASE}" symlink_path=${user_files_path}/${FILE} file_path=$CUR_DIR/${FILE} +touch ${file_path} chmod +w ${file_path} +ln -s ${file_path} ${symlink_path} function cleanup() { @@ -24,9 +26,6 @@ function cleanup() } trap cleanup EXIT -touch ${file_path} -ln -s ${file_path} ${symlink_path} - ${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; ${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; From 9de534f96c751d524f96961d620ae043618e3cdc Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 17 Oct 2021 23:10:01 +0300 Subject: [PATCH 193/238] Revert special contribs and set folder manually for them until fixed --- cmake/find/cxx.cmake | 2 ++ cmake/find/unwind.cmake | 1 + contrib/CMakeLists.txt | 9 --------- contrib/libcxx-cmake/CMakeLists.txt | 1 + contrib/libcxxabi-cmake/CMakeLists.txt | 1 + contrib/libunwind-cmake/CMakeLists.txt | 1 + 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake index b96ba1e1b65..b1da125e219 100644 --- a/cmake/find/cxx.cmake +++ b/cmake/find/cxx.cmake @@ -50,6 +50,8 @@ endif () if (NOT HAVE_LIBCXX AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) set (LIBCXX_LIBRARY cxx) set (LIBCXXABI_LIBRARY cxxabi) + add_subdirectory(contrib/libcxxabi-cmake) + add_subdirectory(contrib/libcxx-cmake) # Exception handling library is embedded into libcxxabi. diff --git a/cmake/find/unwind.cmake b/cmake/find/unwind.cmake index 9ae23ae23c7..c9f5f30a5d6 100644 --- a/cmake/find/unwind.cmake +++ b/cmake/find/unwind.cmake @@ -1,6 +1,7 @@ option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) if (USE_UNWIND) + add_subdirectory(contrib/libunwind-cmake) set (UNWIND_LIBRARIES unwind) set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1be61db40db..676654452de 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -21,15 +21,6 @@ endif() set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) -if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) - add_subdirectory(libcxxabi-cmake) - add_subdirectory(libcxx-cmake) -endif () - -if (USE_UNWIND) - add_subdirectory(libunwind-cmake) -endif () - add_subdirectory (abseil-cpp-cmake) add_subdirectory (magic-enum-cmake) add_subdirectory (boost-cmake) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index ac67f2563a3..2ec6dbff1a1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -47,6 +47,7 @@ set(SRCS ) add_library(cxx ${SRCS}) +set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") target_include_directories(cxx SYSTEM BEFORE PUBLIC $) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0bb5d663633..425111d9b26 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -22,6 +22,7 @@ set(SRCS ) add_library(cxxabi ${SRCS}) +set_target_properties(cxxabi PROPERTIES FOLDER "contrib/libcxxabi-cmake") # Third party library may have substandard code. target_compile_options(cxxabi PRIVATE -w) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 1a9f5e50abd..155853a0bca 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -39,6 +39,7 @@ set(LIBUNWIND_SOURCES ${LIBUNWIND_ASM_SOURCES}) add_library(unwind ${LIBUNWIND_SOURCES}) +set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) From ab9d5d8cc789438ab0b01f6b0a4d712e190fed6f Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 18 Oct 2021 06:06:38 +0000 Subject: [PATCH 194/238] Better --- programs/local/LocalServer.cpp | 5 ----- src/Databases/DatabaseOnDisk.cpp | 9 +++++++++ src/Interpreters/loadMetadata.cpp | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 0c5f64ea913..cdd5ae13f99 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -579,11 +579,6 @@ void LocalServer::processConfig() { String path = global_context->getPath(); - /// When tables are loaded from .sql we initialize background executors - /// regardless there are MergeTree tables or not, because no better place was found. - /// In other cases it will be initialized only when there are mergeTree tables. - global_context->initializeBackgroundExecutors(); - /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 97e59f53f64..a71d539e3c5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -53,6 +53,15 @@ std::pair createTableFromAST( ast_create_query.attach = true; ast_create_query.database = database_name; + auto global_context = context->getGlobalContext(); + if (global_context + && global_context->getApplicationType() == Context::ApplicationType::LOCAL + && !global_context->isBackgroundExecutorsInitialized() + && ast_create_query.storage && endsWith(ast_create_query.storage->engine->name, "MergeTree")) + { + global_context->initializeBackgroundExecutors(); + } + if (ast_create_query.as_table_function) { const auto & factory = TableFunctionFactory::instance(); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 6a3db48e835..65b2065b2ad 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -161,7 +161,7 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam bool create_default_db_if_not_exists = !default_database_name.empty(); bool metadata_dir_for_default_db_already_exists = databases.count(default_database_name); if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists) - databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name)); + databases.emplace(default_database_name, std::filesystem::path(path) / escapeForFileName(default_database_name)); TablesLoader::Databases loaded_databases; for (const auto & [name, db_path] : databases) From ec6b7785015c45a69a9c4224413a19df0a0fe412 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 18 Oct 2021 10:29:57 +0300 Subject: [PATCH 195/238] Update 02051_symlinks_to_user_files.sh --- .../0_stateless/02051_symlinks_to_user_files.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index 53c50542b06..fe3073f9ff2 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -8,17 +8,18 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # See 01658_read_file_to_string_column.sh user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p "${user_files_path}/" -chmod 777 "${user_files_path}" +FILE_PATH="${user_files_path}/file/" +mkdir -p ${FILE_PATH} +chmod 777 ${FILE_PATH} -export FILE="test_symlink_${CLICKHOUSE_DATABASE}" +FILE="test_symlink_${CLICKHOUSE_DATABASE}" -symlink_path=${user_files_path}/${FILE} +symlink_path=${FILE_PATH}/${FILE} file_path=$CUR_DIR/${FILE} touch ${file_path} -chmod +w ${file_path} ln -s ${file_path} ${symlink_path} +chmod +w ${symlink_path} function cleanup() { From a28b048415bb330d5427a25d982378d425eba57e Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 18 Oct 2021 14:22:27 +0300 Subject: [PATCH 196/238] Fix test --- .../0_stateless/01339_client_unrecognized_option.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index f88d890f33c..00c153ec915 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -5,14 +5,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' From 6350957709831641b3b7c43d97a641ca86677d50 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 18 Oct 2021 14:30:02 +0300 Subject: [PATCH 197/238] Fix special build --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 1dc37ff51ec..ecfa5df8351 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -422,7 +422,7 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo getCapnProtoFullTypeName(field.getType())); if (!additional_error_message.empty()) e.addMessage(additional_error_message); - throw e; + throw std::move(e); } } } From b7a53df9de5b8ec3b49fa6a5ee897b12c4757835 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 18 Oct 2021 14:53:42 +0000 Subject: [PATCH 198/238] Send columns description in clickhouse-local --- src/Client/LocalConnection.cpp | 37 +++++++++++++++++-- src/Client/LocalConnection.h | 2 + ...khouse_local_columns_description.reference | 1 + ...00_clickhouse_local_columns_description.sh | 7 ++++ 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference create mode 100755 tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 4455ba3b9ad..20f28bb5337 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -5,7 +5,7 @@ #include #include #include -#include "Core/Protocol.h" +#include namespace DB @@ -105,6 +105,16 @@ void LocalConnection::sendQuery( state->pushing_executor->start(); state->block = state->pushing_executor->getHeader(); } + + const auto & table_id = query_context->getInsertionTable(); + if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) + { + if (!table_id.empty()) + { + auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, query_context); + state->columns_description = storage_ptr->getInMemoryMetadataPtr()->getColumns(); + } + } } else if (state->io.pipeline.pulling()) { @@ -117,7 +127,9 @@ void LocalConnection::sendQuery( executor.execute(); } - if (state->block) + if (state->columns_description) + next_packet_type = Protocol::Server::TableColumns; + else if (state->block) next_packet_type = Protocol::Server::Data; } catch (const Exception & e) @@ -337,21 +349,41 @@ Packet LocalConnection::receivePacket() packet.block = std::move(state->block.value()); state->block.reset(); } + next_packet_type.reset(); + break; + } + case Protocol::Server::TableColumns: + { + if (state->columns_description) + { + /// Send external table name (empty name is the main table) + /// (see TCPHandler::sendTableColumns) + packet.multistring_message = {"", state->columns_description->toString()}; + } + + if (state->block) + { + next_packet_type = Protocol::Server::Data; + } + break; } case Protocol::Server::Exception: { packet.exception = std::make_unique(*state->exception); + next_packet_type.reset(); break; } case Protocol::Server::Progress: { packet.progress = std::move(state->progress); state->progress.reset(); + next_packet_type.reset(); break; } case Protocol::Server::EndOfStream: { + next_packet_type.reset(); break; } default: @@ -359,7 +391,6 @@ Packet LocalConnection::receivePacket() "Unknown packet {} for {}", toString(packet.type), getDescription()); } - next_packet_type.reset(); return packet; } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index b596360db64..1cc23defa6e 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -33,6 +34,7 @@ struct LocalQueryState /// Current block to be sent next. std::optional block; + std::optional columns_description; /// Is request cancelled bool is_cancelled = false; diff --git a/tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference new file mode 100644 index 00000000000..0e291f3ac0d --- /dev/null +++ b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference @@ -0,0 +1 @@ +1 42 diff --git a/tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh new file mode 100755 index 00000000000..f88a8de80f5 --- /dev/null +++ b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --query "create table t (n int, m int default 42) engine=Memory;insert into t values (1, NULL);select * from t" From 3ffca6e138c7e23faf8b23942ff8f0a22147e75f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 18 Oct 2021 21:04:40 +0300 Subject: [PATCH 199/238] ExecutablePool dictionary source fix borrow timeout milliseconds --- src/Dictionaries/ExecutablePoolDictionarySource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 8d1122b1194..dce2ce94b93 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -100,7 +100,7 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout }; auto shell_command = ShellCommand::execute(config); return shell_command; - }, configuration.max_command_execution_time * 1000); + }, configuration.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, From 4e3910e564efcfd308fb3bccb271a7f36aa3386b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:11:54 +0300 Subject: [PATCH 200/238] Add a test for adjusting last granula with max_read_buffer_size=0 --- ...ast_granula_adjust_LOGICAL_ERROR.reference | 8 ++++++++ ...2_last_granula_adjust_LOGICAL_ERROR.sql.j2 | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference create mode 100644 tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference new file mode 100644 index 00000000000..d7d3ee8f362 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference @@ -0,0 +1,8 @@ +1 +1 +10 +10 +100 +100 +10000 +10000 diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 new file mode 100644 index 00000000000..465aa22beb3 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 @@ -0,0 +1,19 @@ +-- Tags: long + +{% for rows_in_table in [1, 10, 100, 10000] %} +{% for wide in [0, 100000000] %} +drop table if exists data_02052_{{ rows_in_table }}_wide{{ wide }}; +create table data_02052_{{ rows_in_table }}_wide{{ wide }} (key Int, value String) +engine=MergeTree() +order by key +settings + min_bytes_for_wide_part={{ wide }} +as select number, repeat(toString(number), 5) from numbers({{ rows_in_table }}); + +-- avoid any optimizations with ignore(*) +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=1, max_threads=1; +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=0, max_threads=1; -- { serverError CANNOT_READ_ALL_DATA } + +drop table data_02052_{{ rows_in_table }}_wide{{ wide }}; +{% endfor %} +{% endfor %} From 41e6df0f4b410a83b386d50dd11c4339334bddbf Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 17 Oct 2021 00:27:28 +0300 Subject: [PATCH 201/238] Remove unused offset_columns from MergeTreeReaderWide::readRows() --- src/Storages/MergeTree/MergeTreeReaderWide.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 206469da7be..29cc45a5c60 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -69,10 +69,6 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si size_t num_columns = columns.size(); checkNumberOfColumns(num_columns); - /// Pointers to offset columns that are common to the nested data structure columns. - /// If append is true, then the value will be equal to nullptr and will be used only to - /// check that the offsets column has been already read. - OffsetColumns offset_columns; std::unordered_map caches; std::unordered_set prefetched_streams; From cd4b33c8c9066f58594e21a605ce27d093026127 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:11:54 +0300 Subject: [PATCH 202/238] Verify that all rows was read in MergeTreeReaderCompact v0: Use fmt-like style exception in MergeTreeReaderCompact v2: Update the check --- src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 9b879283c10..15c5795ee7b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -160,9 +160,10 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; - if (read_rows_in_column < rows_to_read) - throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) + - ". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA); + if (read_rows_in_column != rows_to_read) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all data in MergeTreeReaderCompact. Rows read: {}. Rows expected: {}.", + read_rows_in_column, rows_to_read); } catch (Exception & e) { From e576fd17bd806666886cd8da7a9466d4268b88e7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:11:54 +0300 Subject: [PATCH 203/238] Do not allow reading to empty buffer in MergeTreeReaderStream Changelog: - Verify that all rows was read in MergeTreeReaderWide - Ignore some exceptions for Wide parts - Take max_rows_to_read into account - Verify that requested rows is not zero (otherwise it is too tricky) - Simply verify that buffer is not empty --- src/Storages/MergeTree/MergeTreeReaderStream.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index fc57b48e86d..f225ecae8fa 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -10,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_READ_ALL_DATA; } @@ -76,6 +77,10 @@ MergeTreeReaderStream::MergeTreeReaderStream( if (max_mark_range_bytes != 0) read_settings = read_settings.adjustBufferSize(max_mark_range_bytes); + //// Empty buffer does not makes progress. + if (!read_settings.local_fs_buffer_size || !read_settings.remote_fs_buffer_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer."); + /// Initialize the objects that shall be used to perform read operations. if (uncompressed_cache) { From f7b76373ceecc024eb93f3cbf5198b0022acdcc9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 13 Oct 2021 23:52:39 +0300 Subject: [PATCH 204/238] Move ProfileEvents packet type from TCPHandler into ProfileEventsExt --- src/Interpreters/ProfileEventsExt.cpp | 5 +++++ src/Interpreters/ProfileEventsExt.h | 10 ++++++++++ src/Server/TCPHandler.cpp | 20 ++++---------------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 4386c294316..472efc109fb 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -11,6 +11,11 @@ namespace ProfileEvents { +std::shared_ptr TypeEnum = std::make_shared(DB::DataTypeEnum8::Values{ + { "increment", static_cast(INCREMENT)}, + { "gauge", static_cast(GAUGE)}, +}); + /// Put implementation here to avoid extra linking dependencies for clickhouse_common_io void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only) { diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 699c997d904..8a92eadec79 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include @@ -9,4 +10,13 @@ namespace ProfileEvents /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +/// This is for ProfileEvents packets. +enum Type : int8_t +{ + INCREMENT = 1, + GAUGE = 2, +}; + +extern std::shared_ptr TypeEnum; + } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 87cc76b1812..729cb33371a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -831,12 +832,6 @@ namespace { using namespace ProfileEvents; - enum ProfileEventTypes : int8_t - { - INCREMENT = 1, - GAUGE = 2, - }; - constexpr size_t NAME_COLUMN_INDEX = 4; constexpr size_t VALUE_COLUMN_INDEX = 5; @@ -879,7 +874,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::INCREMENT); + columns[i++]->insert(ProfileEvents::Type::INCREMENT); } } @@ -893,7 +888,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::GAUGE); + columns[i++]->insert(ProfileEvents::Type::GAUGE); columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); columns[i++]->insert(snapshot.memory_usage); @@ -907,18 +902,11 @@ void TCPHandler::sendProfileEvents() if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS) return; - auto profile_event_type = std::make_shared( - DataTypeEnum8::Values - { - { "increment", static_cast(INCREMENT)}, - { "gauge", static_cast(GAUGE)}, - }); - NamesAndTypesList column_names_and_types = { { "host_name", std::make_shared() }, { "current_time", std::make_shared() }, { "thread_id", std::make_shared() }, - { "type", profile_event_type }, + { "type", ProfileEvents::TypeEnum }, { "name", std::make_shared() }, { "value", std::make_shared() }, }; From 424bf6fcf49ce377a26270f9872e0939912dac79 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 12 Oct 2021 21:03:54 +0300 Subject: [PATCH 205/238] client: add ability to print raw profile events This can be useful for debugging and for testing (since you will not need to obtain query_id and look at query_log). v2: - mark test as long - add option to docs - add type of profile event into logs too v3: - resolve conflicts - and fix onProfileEvents callback v4: - add --print-profile-events separate switch --- docs/en/interfaces/cli.md | 2 + src/Client/ClientBase.cpp | 88 ++++++++++++++----- src/Client/ClientBase.h | 11 +++ src/Client/InternalTextLogs.cpp | 68 +++++++++++++- src/Client/InternalTextLogs.h | 29 +++++- .../02050_client_profile_events.reference | 4 + .../02050_client_profile_events.sh | 15 ++++ 7 files changed, 188 insertions(+), 29 deletions(-) create mode 100644 tests/queries/0_stateless/02050_client_profile_events.reference create mode 100755 tests/queries/0_stateless/02050_client_profile_events.sh diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index c4305d229cf..eaf7a96ce42 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -128,6 +128,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). - `--hardware-utilization` — Print hardware utilization information in progress bar. +- `--print-profile-events` – Print `ProfileEvents` packets. +- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7bcff4f5ef7..54e679e4c0f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -266,7 +266,7 @@ void ClientBase::onLogData(Block & block) { initLogsOutputStream(); progress_indication.clearProgressOutput(); - logs_out_stream->write(block); + logs_out_stream->writeLogs(block); logs_out_stream->flush(); } @@ -668,39 +668,61 @@ void ClientBase::onEndOfStream() void ClientBase::onProfileEvents(Block & block) { const auto rows = block.rows(); - if (rows == 0 || !progress_indication.print_hardware_utilization) + if (rows == 0) return; - const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); - const auto & names = typeid_cast(*block.getByName("name").column); - const auto & host_names = typeid_cast(*block.getByName("host_name").column); - const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); - const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); - const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); - - HostToThreadTimesMap thread_times; - for (size_t i = 0; i < rows; ++i) + if (progress_indication.print_hardware_utilization) { - auto thread_id = array_thread_id[i]; - auto host_name = host_names.getDataAt(i).toString(); - if (thread_id != 0) - progress_indication.addThreadIdToList(host_name, thread_id); - auto event_name = names.getDataAt(i); - auto value = array_values[i]; - if (event_name == user_time_name) + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & names = typeid_cast(*block.getByName("name").column); + const auto & host_names = typeid_cast(*block.getByName("host_name").column); + const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); + + const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); + const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); + + HostToThreadTimesMap thread_times; + for (size_t i = 0; i < rows; ++i) { - thread_times[host_name][thread_id].user_ms = value; + auto thread_id = array_thread_id[i]; + auto host_name = host_names.getDataAt(i).toString(); + if (thread_id != 0) + progress_indication.addThreadIdToList(host_name, thread_id); + auto event_name = names.getDataAt(i); + auto value = array_values[i]; + if (event_name == user_time_name) + { + thread_times[host_name][thread_id].user_ms = value; + } + else if (event_name == system_time_name) + { + thread_times[host_name][thread_id].system_ms = value; + } + else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + { + thread_times[host_name][thread_id].memory_usage = value; + } } - else if (event_name == system_time_name) + progress_indication.updateThreadEventData(thread_times); + } + + if (profile_events.print) + { + if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms) { - thread_times[host_name][thread_id].system_ms = value; + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(block); + logs_out_stream->flush(); + + profile_events.watch.restart(); + profile_events.last_block = {}; } - else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + else { - thread_times[host_name][thread_id].memory_usage = value; + profile_events.last_block = block; } } - progress_indication.updateThreadEventData(thread_times); } @@ -1023,6 +1045,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin processed_rows = 0; written_first_block = false; progress_indication.resetProgress(); + profile_events.watch.restart(); { /// Temporarily apply query settings to context. @@ -1091,6 +1114,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } } + /// Always print last block (if it was not printed already) + if (profile_events.last_block) + { + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(profile_events.last_block); + logs_out_stream->flush(); + } + if (is_interactive) { std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; @@ -1561,6 +1593,8 @@ void ClientBase::init(int argc, char ** argv) ("ignore-error", "do not stop processing in multiquery mode") ("stacktrace", "print stack traces of exceptions") ("hardware-utilization", "print hardware utilization information in progress bar") + ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") + ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") ; addAndCheckOptions(options_description, options, common_arguments); @@ -1611,6 +1645,10 @@ void ClientBase::init(int argc, char ** argv) config().setBool("vertical", true); if (options.count("stacktrace")) config().setBool("stacktrace", true); + if (options.count("print-profile-events")) + config().setBool("print-profile-events", true); + if (options.count("profile-events-delay-ms")) + config().setInt("profile-events-delay-ms", options["profile-events-delay-ms"].as()); if (options.count("progress")) config().setBool("progress", true); if (options.count("echo")) @@ -1631,6 +1669,8 @@ void ClientBase::init(int argc, char ** argv) progress_indication.print_hardware_utilization = true; query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + profile_events.print = options.count("print-profile-events"); + profile_events.delay_ms = options["profile-events-delay-ms"].as(); processOptions(options_description, options, external_tables_arguments); argsToConfig(common_arguments, config(), 100); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index fce706d7cf8..b31eff82b30 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -217,6 +218,16 @@ protected: QueryFuzzer fuzzer; int query_fuzzer_runs = 0; + struct + { + bool print = false; + /// UINT64_MAX -- print only last + UInt64 delay_ms = 0; + Stopwatch watch; + /// For printing only last (delay_ms == 0). + Block last_block; + } profile_events; + QueryProcessingStage::Enum query_processing_stage; }; diff --git a/src/Client/InternalTextLogs.cpp b/src/Client/InternalTextLogs.cpp index 65592fee670..430ba6daf0a 100644 --- a/src/Client/InternalTextLogs.cpp +++ b/src/Client/InternalTextLogs.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -13,7 +14,7 @@ namespace DB { -void InternalTextLogs::write(const Block & block) +void InternalTextLogs::writeLogs(const Block & block) { const auto & array_event_time = typeid_cast(*block.getByName("event_time").column).getData(); const auto & array_microseconds = typeid_cast(*block.getByName("event_time_microseconds").column).getData(); @@ -97,4 +98,69 @@ void InternalTextLogs::write(const Block & block) } } +void InternalTextLogs::writeProfileEvents(const Block & block) +{ + const auto & column_host_name = typeid_cast(*block.getByName("host_name").column); + const auto & array_current_time = typeid_cast(*block.getByName("current_time").column).getData(); + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & array_type = typeid_cast(*block.getByName("type").column).getData(); + const auto & column_name = typeid_cast(*block.getByName("name").column); + const auto & array_value = typeid_cast(*block.getByName("value").column).getData(); + + for (size_t row_num = 0; row_num < block.rows(); ++row_num) + { + /// host_name + auto host_name = column_host_name.getDataAt(row_num); + if (host_name.size) + { + writeCString("[", wb); + if (color) + writeString(setColor(StringRefHash()(host_name)), wb); + writeString(host_name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString("] ", wb); + } + + /// current_time + auto current_time = array_current_time[row_num]; + writeDateTimeText<'.', ':'>(current_time, wb); + + /// thread_id + UInt64 thread_id = array_thread_id[row_num]; + writeCString(" [ ", wb); + if (color) + writeString(setColor(intHash64(thread_id)), wb); + writeIntText(thread_id, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(" ] ", wb); + + /// name + auto name = column_name.getDataAt(row_num); + if (color) + writeString(setColor(StringRefHash()(name)), wb); + DB::writeString(name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(": ", wb); + + /// value + UInt64 value = array_value[row_num]; + writeIntText(value, wb); + + //// type + Int8 type = array_type[row_num]; + writeCString(" (", wb); + if (color) + writeString(setColor(intHash64(type)), wb); + writeString(toString(ProfileEvents::TypeEnum->castToName(type)), wb); + if (color) + writeCString(resetColor(), wb); + writeCString(")", wb); + + writeChar('\n', wb); + } +} + } diff --git a/src/Client/InternalTextLogs.h b/src/Client/InternalTextLogs.h index a8b119b0f69..0690211fd24 100644 --- a/src/Client/InternalTextLogs.h +++ b/src/Client/InternalTextLogs.h @@ -6,16 +6,37 @@ namespace DB { -/// Prints internal server logs -/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock() +/// Prints internal server logs or profile events with colored output (if requested). /// NOTE: IRowOutputFormat does not suite well for this case class InternalTextLogs { public: InternalTextLogs(WriteBuffer & buf_out, bool color_) : wb(buf_out), color(color_) {} - - void write(const Block & block); + /// Print internal server logs + /// + /// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock(): + /// - event_time + /// - event_time_microseconds + /// - host_name + /// - query_id + /// - thread_id + /// - priority + /// - source + /// - text + void writeLogs(const Block & block); + /// Print profile events. + /// + /// Block: + /// - host_name + /// - current_time + /// - thread_id + /// - type + /// - name + /// - value + /// + /// See also TCPHandler::sendProfileEvents() for block columns. + void writeProfileEvents(const Block & block); void flush() { diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference new file mode 100644 index 00000000000..00fc3b5d06a --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -0,0 +1,4 @@ +0 +SelectedRows: 131010 (increment) +OK +OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh new file mode 100755 index 00000000000..5c3887cf5fb --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# do not print any ProfileEvents packets +$CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' +# print only last +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' +# print everything +test "$($CLICKHOUSE_CLIENT --print-profile-events -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +# print each 100 ms +test "$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=100 -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL From fb91b1d92b8309e8a925cce25f4e1adaf967a306 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 19 Oct 2021 01:04:07 +0300 Subject: [PATCH 206/238] StorageExecutable fix small issues --- src/Storages/StorageExecutable.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 76be3353808..6a82fc88977 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include @@ -111,9 +113,16 @@ Pipe StorageExecutable::read( { auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; - if (!std::filesystem::exists(std::filesystem::path(script_path))) + + if (!pathStartsWith(script_path, user_scripts_path)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Executable file {} does not exists inside {}", + "Executable file {} must be inside user scripts folder {}", + script_name, + user_scripts_path); + + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exists inside user scripts folder {}", script_name, user_scripts_path); @@ -139,9 +148,9 @@ Pipe StorageExecutable::read( bool result = process_pool->tryBorrowObject(process, [&config, this]() { config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, settings.command_termination_timeout }; - auto shell_command = ShellCommand::execute(config); + auto shell_command = ShellCommand::executeDirect(config); return shell_command; - }, settings.max_command_execution_time * 1000); + }, settings.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, From e53335bc6fc061ce47a40b94d3b5a91ac042717f Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 19 Oct 2021 08:19:43 +0000 Subject: [PATCH 207/238] Better way --- programs/server/Server.cpp | 2 +- src/Databases/DatabaseOnDisk.cpp | 9 --------- src/Interpreters/Context.cpp | 11 ++++------- src/Interpreters/Context.h | 3 +-- src/Interpreters/InterpreterCreateQuery.cpp | 9 --------- src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ 6 files changed, 8 insertions(+), 28 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2b526608715..bbd9af1e97e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -919,7 +919,7 @@ if (ThreadFuzzer::instance().isEffective()) /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. - global_context->initializeBackgroundExecutors(); + global_context->initializeBackgroundExecutorsIfNeeded(); if (settings.async_insert_threads) global_context->setAsynchronousInsertQueue(std::make_shared( diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index a71d539e3c5..97e59f53f64 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -53,15 +53,6 @@ std::pair createTableFromAST( ast_create_query.attach = true; ast_create_query.database = database_name; - auto global_context = context->getGlobalContext(); - if (global_context - && global_context->getApplicationType() == Context::ApplicationType::LOCAL - && !global_context->isBackgroundExecutorsInitialized() - && ast_create_query.storage && endsWith(ast_create_query.storage->engine->name, "MergeTree")) - { - global_context->initializeBackgroundExecutors(); - } - if (ast_create_query.as_table_function) { const auto & factory = TableFunctionFactory::instance(); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1602e6a6a31..bbad7e782ed 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2895,14 +2895,11 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } -bool Context::isBackgroundExecutorsInitialized() const +void Context::initializeBackgroundExecutorsIfNeeded() { - return is_background_executors_initialized; -} - -void Context::initializeBackgroundExecutors() -{ - assert(!is_background_executors_initialized); + auto lock = getLock(); + if (is_background_executors_initialized) + return; const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 93be367e46d..b20274c2cb8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -862,8 +862,7 @@ public: void setReadTaskCallback(ReadTaskCallback && callback); /// Background executors related methods - void initializeBackgroundExecutors(); - bool isBackgroundExecutorsInitialized() const; + void initializeBackgroundExecutorsIfNeeded(); MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const; OrdinaryBackgroundExecutorPtr getMovesExecutor() const; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5b993bce724..6d38c55bd62 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -833,15 +833,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database.empty() ? current_database : create.database; - auto global_context = getContext()->getGlobalContext(); - if (global_context - && global_context->getApplicationType() == Context::ApplicationType::LOCAL - && !global_context->isBackgroundExecutorsInitialized() - && create.storage && endsWith(create.storage->engine->name, "MergeTree")) - { - global_context->initializeBackgroundExecutors(); - } - // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns_list) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10fa18186ee..8b03c1e614d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -205,6 +205,8 @@ MergeTreeData::MergeTreeData( , background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext()) , background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext()) { + context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); + const auto settings = getSettings(); allow_nullable_key = attach || settings->allow_nullable_key; From eb0ce68f10e860bfc864bcca7d6bcdeca2072ab4 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 19 Oct 2021 11:27:48 +0300 Subject: [PATCH 208/238] Update 02051_symlinks_to_user_files.sh --- tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index fe3073f9ff2..dfdc71e0f0b 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -19,7 +19,7 @@ file_path=$CUR_DIR/${FILE} touch ${file_path} ln -s ${file_path} ${symlink_path} -chmod +w ${symlink_path} +chmod ugo+w ${symlink_path} function cleanup() { From 3dfbc80f0b0a4aec7649cf2678d4257a19b10b1a Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 12:38:11 +0300 Subject: [PATCH 209/238] Add cases to test replaceRegexpAll_bug --- .../02100_replaceRegexpAll_bug.reference | 14 +++++++++++--- .../0_stateless/02100_replaceRegexpAll_bug.sql | 17 ++++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference index 2bb40778ca6..993dd9b1cde 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -1,3 +1,11 @@ -aaaabb -b aaaa -aaaa +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql index d0caeacfa0e..32f7f63f6d0 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -1,3 +1,14 @@ -SELECT trim(leading 'b ' FROM 'b aaaabb ') x; -SELECT trim(trailing 'b ' FROM 'b aaaabb ') x; -SELECT trim(both 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaabb ' == trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT 'b aaaa' == trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaa' == trim(both 'b ' FROM 'b aaaabb ') x; + +SELECT '1' == replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; + +SELECT '1,,' == replaceRegexpOne(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpOne(',,1', '^[,]*|[,]*$', '') x; +SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; + +SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; +SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; From 36bb4033ba9a0f8dc49a6ae1f604167e284e4d67 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 12:44:55 +0300 Subject: [PATCH 210/238] Whitespace change in kerberized_hadoop/Dockerfile --- docker/test/integration/kerberized_hadoop/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 11da590f901..4a2a8866b8d 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -1,6 +1,7 @@ # docker build -t clickhouse/kerberized-hadoop . FROM sequenceiq/hadoop-docker:2.7.0 + RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo From a92dc0a8260cc2436f098ce31cae6c5b0bdc5e03 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Oct 2021 12:58:10 +0300 Subject: [PATCH 211/238] Update obsolete comments. --- src/Formats/FormatFactory.h | 2 +- src/Formats/NativeReader.cpp | 1 - src/IO/Progress.h | 2 +- src/Interpreters/Aggregator.h | 2 -- src/Interpreters/Context.h | 4 ++-- src/Processors/Formats/IOutputFormat.h | 3 ++- src/Processors/Sources/SourceWithProgress.cpp | 9 +++------ src/QueryPipeline/ProfileInfo.h | 2 +- src/QueryPipeline/QueryPipelineBuilder.h | 1 - 9 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d5784219c6a..ee3824081bb 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -42,7 +42,7 @@ FormatSettings getFormatSettings(ContextPtr context); template FormatSettings getFormatSettings(ContextPtr context, const T & settings); -/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format. +/** Allows to create an IInputFormat or IOutputFormat by the name of the format. * Note: format and compression are independent things. */ class FormatFactory final : private boost::noncopyable diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 2d8fdc160f5..9ef248dc904 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -56,7 +56,6 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, } } -// also resets few vars from IBlockInputStream (I didn't want to propagate resetParser upthere) void NativeReader::resetParser() { istr_concrete = nullptr; diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 7118de844f2..c00eea98ff4 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -121,7 +121,7 @@ struct Progress /** Callback to track the progress of the query. - * Used in IBlockInputStream and Context. + * Used in QueryPipeline and Context. * The function takes the number of rows in the last block, the number of bytes in the last block. * Note that the callback can be called from different threads. */ diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 6d6bf61834b..3c53769e128 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -43,8 +43,6 @@ namespace ErrorCodes extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; } -class IBlockOutputStream; - /** Different data structures that can be used for aggregation * For efficiency, the aggregation data itself is put into the pool. * Data and pool ownership (states of aggregate functions) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 5a28e3fac97..c6bb266120a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -636,13 +636,13 @@ public: const Settings & getSettingsRef() const { return settings; } void setProgressCallback(ProgressCallback callback); - /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. + /// Used in executeQuery() to pass it to the QueryPipeline. ProgressCallback getProgressCallback() const; void setFileProgressCallback(FileProgressCallback && callback) { file_progress_callback = callback; } FileProgressCallback getFileProgressCallback() const { return file_progress_callback; } - /** Set in executeQuery and InterpreterSelectQuery. Then it is used in IBlockInputStream, + /** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline, * to update and monitor information about the total number of resources spent for the query. */ void setProcessListElement(QueryStatus * elem); diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index b647338d6fb..ba4dcee6f70 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -72,7 +72,8 @@ public: InputPort & getPort(PortKind kind) { return *std::next(inputs.begin(), kind); } - /// Compatible to IBlockOutputStream interface + /// Compatibility with old interface. + /// TODO: separate formats and processors. void write(const Block & block); diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp index 15d64dee3ee..0ebdd968997 100644 --- a/src/Processors/Sources/SourceWithProgress.cpp +++ b/src/Processors/Sources/SourceWithProgress.cpp @@ -69,8 +69,7 @@ void SourceWithProgress::work() } } -/// Aggregated copy-paste from IBlockInputStream::progressImpl. -/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. +/// TODO: Most of this must be done in PipelineExecutor outside. void SourceWithProgress::progress(const Progress & value) { was_progress_called = true; @@ -135,14 +134,12 @@ void SourceWithProgress::progress(const Progress & value) if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) { - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. CurrentThread::updatePerformanceCounters(); last_profile_events_update_time = total_elapsed_microseconds; } - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) diff --git a/src/QueryPipeline/ProfileInfo.h b/src/QueryPipeline/ProfileInfo.h index 335092ce244..0a5800cd409 100644 --- a/src/QueryPipeline/ProfileInfo.h +++ b/src/QueryPipeline/ProfileInfo.h @@ -12,7 +12,7 @@ class Block; class ReadBuffer; class WriteBuffer; -/// Information for profiling. See IBlockInputStream.h +/// Information for profiling. See SourceWithProgress.h struct ProfileInfo { bool started = false; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index d2bbea03ce5..12f74805173 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -129,7 +129,6 @@ public: void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); } void setQuota(const std::shared_ptr & quota) { pipe.setQuota(quota); } - /// For compatibility with IBlockInputStream. void setProgressCallback(const ProgressCallback & callback); void setProcessListElement(QueryStatus * elem); From 4fbd332bf1bc0db29dee09699c4c737bfd2e64b0 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 11:53:55 +0000 Subject: [PATCH 212/238] Done --- .../sql-reference/statements/select/from.md | 2 +- .../sql-reference/statements/select/from.md | 2 +- .../sql-reference/statements/select/from.md | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 20 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 6 +----- .../queries/0_stateless/01236_graphite_mt.sql | 4 +--- 6 files changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 7c5ea732122..df30a0fb0d2 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside ` When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. -It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family (except `GraphiteMergeTree`). Also supported for: +It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family. Also supported for: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index b62b2fd51d4..0711d602cd1 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ toc_title: FROM Если в запросе используется модификатор `FINAL`, то ClickHouse полностью мёржит данные перед выдачей результата, таким образом выполняя все преобразования данных, которые производятся движком таблиц при мёржах. -Он применим при выборе данных из таблиц, использующих [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)- семейство движков (кроме `GraphiteMergeTree`). Также поддерживается для: +Он применим при выборе данных из таблиц, использующих [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)- семейство движков. Также поддерживается для: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты исполнения `MergeTree` движков. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), и [MaterializedView](../../../engines/table-engines/special/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`. diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md index fae25c0c3c1..c47e74e5e1f 100644 --- a/docs/zh/sql-reference/statements/select/from.md +++ b/docs/zh/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ toc_title: FROM 当 `FINAL` 被指定,ClickHouse会在返回结果之前完全合并数据,从而执行给定表引擎合并期间发生的所有数据转换。 -它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族(除了 `GraphiteMergeTree`). 还支持: +它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族. 还支持: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎 - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可。 diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 57785a5cc2d..3b1d7254e2c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -506,38 +507,39 @@ static void addMergingFinal( const auto & header = pipe.getHeader(); size_t num_outputs = pipe.numOutputPorts(); + auto now = time(nullptr); + auto get_merging_processor = [&]() -> MergingTransformPtr { switch (merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: - { return std::make_shared(header, num_outputs, - sort_description, max_block_size); - } + sort_description, max_block_size); case MergeTreeData::MergingParams::Collapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, true, max_block_size); + sort_description, merging_params.sign_column, true, max_block_size); case MergeTreeData::MergingParams::Summing: return std::make_shared(header, num_outputs, - sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); + sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); case MergeTreeData::MergingParams::Aggregating: return std::make_shared(header, num_outputs, - sort_description, max_block_size); + sort_description, max_block_size); case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.version_column, max_block_size); + sort_description, merging_params.version_column, max_block_size); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, max_block_size); + sort_description, merging_params.sign_column, max_block_size); case MergeTreeData::MergingParams::Graphite: - throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + return std::make_shared(header, num_outputs, + sort_description, max_block_size, merging_params.graphite_params, now); } __builtin_unreachable(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c4536c463d5..d20d0024222 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -402,11 +402,7 @@ public: bool supportsFinal() const override { - return merging_params.mode == MergingParams::Collapsing - || merging_params.mode == MergingParams::Summing - || merging_params.mode == MergingParams::Aggregating - || merging_params.mode == MergingParams::Replacing - || merging_params.mode == MergingParams::VersionedCollapsing; + return merging_params.mode != MergingParams::Ordinary; } bool supportsSubcolumns() const override { return true; } diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index a6dd4b8b6fb..3697a1d01d8 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -32,8 +32,6 @@ WITH dates AS select 1, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200) union all select 2, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200); -optimize table test_graphite final; - -select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc; +select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc final; drop table test_graphite; From 1114d06bc0191bc2b204cfeae3aa23ac6673c610 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 12:11:18 +0000 Subject: [PATCH 213/238] Done --- src/Functions/readWkt.cpp | 32 ++++++------- src/Functions/registerFunctionsGeo.cpp | 4 +- src/Functions/svg.cpp | 1 + tests/fuzz/all.dict | 9 ++-- tests/fuzz/dictionaries/functions.dict | 9 ++-- tests/queries/0_stateless/01300_read_wkt.sql | 14 +++--- tests/queries/0_stateless/01300_svg.sql | 48 ++++++++++---------- 7 files changed, 60 insertions(+), 57 deletions(-) diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index 14e12fb310c..c3ae6516e0f 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -18,10 +18,10 @@ namespace ErrorCodes template -class FunctionReadWkt : public IFunction +class FunctionReadWKT : public IFunction { public: - explicit FunctionReadWkt() = default; + explicit FunctionReadWKT() = default; static constexpr const char * name = NameHolder::name; @@ -72,36 +72,36 @@ public: static FunctionPtr create(ContextPtr) { - return std::make_shared>(); + return std::make_shared>(); } }; -struct ReadWktPointNameHolder +struct ReadWKTPointNameHolder { - static constexpr const char * name = "readWktPoint"; + static constexpr const char * name = "readWKTPoint"; }; -struct ReadWktRingNameHolder +struct ReadWKTRingNameHolder { - static constexpr const char * name = "readWktRing"; + static constexpr const char * name = "readWKTRing"; }; -struct ReadWktPolygonNameHolder +struct ReadWKTPolygonNameHolder { - static constexpr const char * name = "readWktPolygon"; + static constexpr const char * name = "readWKTPolygon"; }; -struct ReadWktMultiPolygonNameHolder +struct ReadWKTMultiPolygonNameHolder { - static constexpr const char * name = "readWktMultiPolygon"; + static constexpr const char * name = "readWKTMultiPolygon"; }; -void registerFunctionReadWkt(FunctionFactory & factory) +void registerFunctionReadWKT(FunctionFactory & factory) { - factory.registerFunction, ReadWktPointNameHolder>>(); - factory.registerFunction, ReadWktRingNameHolder>>(); - factory.registerFunction, ReadWktPolygonNameHolder>>(); - factory.registerFunction, ReadWktMultiPolygonNameHolder>>(); + factory.registerFunction, ReadWKTPointNameHolder>>(); + factory.registerFunction, ReadWKTRingNameHolder>>(); + factory.registerFunction, ReadWKTPolygonNameHolder>>(); + factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); } } diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index a0ae38f6b85..fd55c9cc20a 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -23,7 +23,7 @@ void registerFunctionGeohashEncode(FunctionFactory & factory); void registerFunctionGeohashDecode(FunctionFactory & factory); void registerFunctionGeohashesInBox(FunctionFactory & factory); void registerFunctionWkt(FunctionFactory & factory); -void registerFunctionReadWkt(FunctionFactory & factory); +void registerFunctionReadWKT(FunctionFactory & factory); void registerFunctionSvg(FunctionFactory & factory); #if USE_H3 @@ -79,7 +79,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionGeohashDecode(factory); registerFunctionGeohashesInBox(factory); registerFunctionWkt(factory); - registerFunctionReadWkt(factory); + registerFunctionReadWKT(factory); registerFunctionSvg(factory); #if USE_H3 diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index 4495e668add..b3a89c0393c 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -102,6 +102,7 @@ public: void registerFunctionSvg(FunctionFactory & factory) { factory.registerFunction(); + factory.registerAlias("SVG", "svg"); } } diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 4a9afc348cf..bf25f1fa484 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -985,10 +985,10 @@ "RANGE" "rank" "rankCorr" -"readWktMultiPolygon" -"readWktPoint" -"readWktPolygon" -"readWktRing" +"readWKTMultiPolygon" +"readWKTPoint" +"readWKTPolygon" +"readWKTRing" "REAL" "REFRESH" "regexpQuoteMeta" @@ -1177,6 +1177,7 @@ "sumWithOverflow" "SUSPEND" "svg" +"SVG" "SYNC" "synonyms" "SYNTAX" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index fb35375f284..722e931dc09 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -52,6 +52,7 @@ "h3GetResolution" "h3EdgeLengthM" "svg" +"SVG" "equals" "geohashesInBox" "polygonsIntersectionCartesian" @@ -114,7 +115,7 @@ "replaceOne" "emptyArrayInt32" "extract" -"readWktPolygon" +"readWKTPolygon" "notILike" "geohashDecode" "toModifiedJulianDay" @@ -164,7 +165,7 @@ "lessOrEquals" "subtractQuarters" "ngramSearch" -"readWktRing" +"readWKTRing" "trimRight" "endsWith" "ngramDistanceCaseInsensitive" @@ -713,13 +714,13 @@ "s2RectContains" "toDate" "regexpQuoteMeta" -"readWktMultiPolygon" +"readWKTMultiPolygon" "emptyArrayString" "bitmapOr" "cutWWW" "emptyArrayInt8" "less" -"readWktPoint" +"readWKTPoint" "reinterpretAsDateTime" "notEquals" "geoToS2" diff --git a/tests/queries/0_stateless/01300_read_wkt.sql b/tests/queries/0_stateless/01300_read_wkt.sql index 8121bdf6084..1995c5153d7 100644 --- a/tests/queries/0_stateless/01300_read_wkt.sql +++ b/tests/queries/0_stateless/01300_read_wkt.sql @@ -1,14 +1,14 @@ -SELECT readWktPoint('POINT(0 0)'); -SELECT readWktPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); -SELECT readWktPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); -SELECT readWktMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); +SELECT readWKTPoint('POINT(0 0)'); +SELECT readWKTPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); +SELECT readWKTPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); +SELECT readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('POINT(0 0)', 1); INSERT INTO geo VALUES ('POINT(1 0)', 2); INSERT INTO geo VALUES ('POINT(2 0)', 3); -SELECT readWktPoint(s) FROM geo ORDER BY id; +SELECT readWKTPoint(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); @@ -18,13 +18,13 @@ INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0))', 3); INSERT INTO geo VALUES ('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))', 4); INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4))', 5); INSERT INTO geo VALUES ('POLYGON((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4))', 6); -SELECT readWktPolygon(s) FROM geo ORDER BY id; +SELECT readWKTPolygon(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('MULTIPOLYGON(((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 1); INSERT INTO geo VALUES ('MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 2); INSERT INTO geo VALUES ('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 3); -SELECT readWktMultiPolygon(s) FROM geo ORDER BY id; +SELECT readWKTMultiPolygon(s) FROM geo ORDER BY id; DROP TABLE geo; diff --git a/tests/queries/0_stateless/01300_svg.sql b/tests/queries/0_stateless/01300_svg.sql index a1deb1745c3..cf794f2190b 100644 --- a/tests/queries/0_stateless/01300_svg.sql +++ b/tests/queries/0_stateless/01300_svg.sql @@ -1,50 +1,50 @@ -SELECT svg((0., 0.)); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)]); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); -SELECT svg((0., 0.), 'b'); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); +SELECT SVG((0., 0.)); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)]); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); +SELECT SVG((0., 0.), 'b'); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Tuple(Float64, Float64), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ((0., 0.), 'b', 1); INSERT INTO geo VALUES ((1., 0.), 'c', 2); INSERT INTO geo VALUES ((2., 0.), 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg((0., 0.), s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG((0., 0.), s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Tuple(Float64, Float64)), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b', 1); INSERT INTO geo VALUES ([(1., 0.), (10, 0), (10, 10), (0, 10)], 'c', 2); INSERT INTO geo VALUES ([(2., 0.), (10, 0), (10, 10), (0, 10)], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Tuple(Float64, Float64))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'b', 1); INSERT INTO geo VALUES ([[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'c', 2); INSERT INTO geo VALUES ([[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Array(Tuple(Float64, Float64)))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b', 1); INSERT INTO geo VALUES ([[[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'c', 2); INSERT INTO geo VALUES ([[[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE geo; From 75487be8998d02b22a800e27463a7b942053a80d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 12:16:33 +0000 Subject: [PATCH 214/238] White list of storages that supports final --- src/Storages/MergeTree/MergeTreeData.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d20d0024222..2ea6a89002c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -402,7 +402,12 @@ public: bool supportsFinal() const override { - return merging_params.mode != MergingParams::Ordinary; + return merging_params.mode == MergingParams::Collapsing + || merging_params.mode == MergingParams::Summing + || merging_params.mode == MergingParams::Aggregating + || merging_params.mode == MergingParams::Replacing + || merging_params.mode == MergingParams::Graphite + || merging_params.mode == MergingParams::VersionedCollapsing; } bool supportsSubcolumns() const override { return true; } From 08f3a01830867cac67703cd680a1c7280a44a079 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 16:26:16 +0300 Subject: [PATCH 215/238] Update 01236_graphite_mt.sql --- tests/queries/0_stateless/01236_graphite_mt.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index 3697a1d01d8..1d531f88ecb 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -32,6 +32,6 @@ WITH dates AS select 1, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200) union all select 2, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200); -select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc final; +select key, Path, Value, Version, col from test_graphite final order by key, Path, Time desc; drop table test_graphite; From 1d78f1c63b525812699fc16586f7fb7409162bc0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 16:40:06 +0300 Subject: [PATCH 216/238] Fix ca-bundle.crt in clickhouse/kerberized-hadoop --- docker/test/integration/kerberized_hadoop/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 4a2a8866b8d..7bc0a99f9e9 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -5,6 +5,10 @@ FROM sequenceiq/hadoop-docker:2.7.0 RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +# https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 +RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt + RUN yum clean all && \ rpm --rebuilddb && \ yum -y update && \ From acf416900dbc3181859c674d13d46849e848d1bf Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 16:45:03 +0300 Subject: [PATCH 217/238] Minor fix in clickhouse/kerberized-hadoop Dockerfile --- docker/test/integration/kerberized_hadoop/Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 7bc0a99f9e9..00944cbfc00 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -2,9 +2,9 @@ FROM sequenceiq/hadoop-docker:2.7.0 -RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo +RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo # https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt @@ -14,8 +14,9 @@ RUN yum clean all && \ yum -y update && \ yum -y install yum-plugin-ovl && \ yum --quiet -y install krb5-workstation.x86_64 + RUN cd /tmp && \ - curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ + curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ tar xzf commons-daemon-1.0.15-src.tar.gz && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ From 0f2e23d775f39d49c32b2a0d03f61b46b468fe35 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 19 Oct 2021 17:41:59 +0300 Subject: [PATCH 218/238] Update StorageExecutable.cpp --- src/Storages/StorageExecutable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 6a82fc88977..16647d0b60f 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -122,7 +122,7 @@ Pipe StorageExecutable::read( if (!std::filesystem::exists(std::filesystem::path(script_path))) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Executable file {} does not exists inside user scripts folder {}", + "Executable file {} does not exist inside user scripts folder {}", script_name, user_scripts_path); From aa8bc93be8c2e245ebf12c7b5872910a2f071722 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 19 Oct 2021 23:35:59 +0800 Subject: [PATCH 219/238] Fix build --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a61c2669ef2..106147d95fc 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1895,7 +1895,7 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log); } -void Context::initializeKeeperDispatcher(bool start_async) const +void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const { #if USE_NURAFT std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); From 05d93796dcf5ee30a9daebcd840e2b35fbf32fb2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 19 Oct 2021 23:36:07 +0800 Subject: [PATCH 220/238] Fix minmax_count projection with primary key in partition expr --- src/Storages/MergeTree/MergeTreeData.cpp | 20 +++++++++---------- src/Storages/ProjectionsDescription.cpp | 20 ++++++++++++------- src/Storages/ProjectionsDescription.h | 4 ---- .../01710_minmax_count_projection.reference | 1 + .../01710_minmax_count_projection.sql | 6 ++++++ 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10fa18186ee..ced385a18c4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4470,16 +4470,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( } size_t pos = 0; - if (!primary_key_max_column_name.empty()) - { - const auto & primary_key_column = *part->index[0]; - auto primary_key_column_size = primary_key_column.size(); - auto & min_column = assert_cast(*minmax_count_columns[pos++]); - auto & max_column = assert_cast(*minmax_count_columns[pos++]); - insert(min_column, primary_key_column[0]); - insert(max_column, primary_key_column[primary_key_column_size - 1]); - } - size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -4490,6 +4480,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( insert(max_column, range.right); } + if (!primary_key_max_column_name.empty()) + { + const auto & primary_key_column = *part->index[0]; + auto primary_key_column_size = primary_key_column.size(); + auto & min_column = assert_cast(*minmax_count_columns[pos++]); + auto & max_column = assert_cast(*minmax_count_columns[pos++]); + insert(min_column, primary_key_column[0]); + insert(max_column, primary_key_column[primary_key_column_size - 1]); + } + { auto & column = assert_cast(*minmax_count_columns.back()); auto func = column.getAggregateFunction(); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index b204c288000..e5117a306ee 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -184,16 +184,16 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( auto select_query = std::make_shared(); ASTPtr select_expression_list = std::make_shared(); - if (!primary_key_asts.empty()) - { - select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); - select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); - } for (const auto & column : minmax_columns) { select_expression_list->children.push_back(makeASTFunction("min", std::make_shared(column))); select_expression_list->children.push_back(makeASTFunction("max", std::make_shared(column))); } + if (!primary_key_asts.empty()) + { + select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); + select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); + } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); @@ -207,8 +207,14 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); - if (!primary_key_asts.empty()) - result.primary_key_max_column_name = result.sample_block.getNames()[ProjectionDescription::PRIMARY_KEY_MAX_COLUMN_POS]; + /// If we have primary key and it's not in minmax_columns, it will be used as one additional minmax columns. + if (!primary_key_asts.empty() && result.sample_block.columns() == 2 * (minmax_columns.size() + 1) + 1) + { + /// min(p1), max(p1), min(p2), max(p2), ..., min(k1), max(k1), count() + /// ^ + /// size - 2 + result.primary_key_max_column_name = *(result.sample_block.getNames().cend() - 2); + } result.type = ProjectionDescription::Type::Aggregate; StorageInMemoryMetadata metadata; metadata.setColumns(ColumnsDescription(result.sample_block.getNamesAndTypesList())); diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 4dd717239ad..7c254182ba4 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -30,10 +30,6 @@ struct ProjectionDescription static constexpr const char * MINMAX_COUNT_PROJECTION_NAME = "_minmax_count_projection"; - /// If minmax_count projection contains a primary key's minmax values. Their positions will be 0 and 1. - static constexpr const size_t PRIMARY_KEY_MIN_COLUMN_POS = 0; - static constexpr const size_t PRIMARY_KEY_MAX_COLUMN_POS = 1; - /// Definition AST of projection ASTPtr definition_ast; diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index 5591d5a9954..da7d2fbe2bd 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -8,3 +8,4 @@ 0 0 9999 0 9999 +3 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 112487b219e..b7077de1fe6 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -43,3 +43,9 @@ select min(j), max(j) from has_final_mark; set max_rows_to_read = 5001; -- one normal part 5000 + one minmax_count_projection part 1 select min(j), max(j) from mixed_final_mark; + +-- The first primary expr is the same of some partition column +drop table if exists t; +create table t (server_date Date, something String) engine MergeTree partition by (toYYYYMM(server_date), server_date) order by (server_date, something); +insert into t values ('2019-01-01', 'test1'), ('2019-02-01', 'test2'), ('2019-03-01', 'test3'); +select count() from t; From 9699a71806c63f6fd47099bde51031bb12f403c9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Oct 2021 22:39:55 +0300 Subject: [PATCH 221/238] Update amis --- tests/ci/metrics_lambda/app.py | 2 +- tests/ci/termination_lambda/app.py | 2 +- tests/ci/token_lambda/app.py | 2 +- tests/ci/worker/ubuntu_ami.sh | 47 ++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 tests/ci/worker/ubuntu_ami.sh diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index 6c6fc594847..d2fb048638b 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -10,7 +10,7 @@ from collections import namedtuple def get_key_and_app_from_aws(): import boto3 - secret_name = "clickhouse_github_secret_key_1" + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( service_name='secretsmanager', diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index 7fd7c400db9..0b39cf73f25 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -10,7 +10,7 @@ from collections import namedtuple def get_key_and_app_from_aws(): import boto3 - secret_name = "clickhouse_github_secret_key_1" + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( service_name='secretsmanager', diff --git a/tests/ci/token_lambda/app.py b/tests/ci/token_lambda/app.py index 4edd3e8d08c..731d6c040de 100644 --- a/tests/ci/token_lambda/app.py +++ b/tests/ci/token_lambda/app.py @@ -39,7 +39,7 @@ def get_runner_registration_token(access_token): def get_key_and_app_from_aws(): import boto3 - secret_name = "clickhouse_github_secret_key_1" + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( service_name='secretsmanager', diff --git a/tests/ci/worker/ubuntu_ami.sh b/tests/ci/worker/ubuntu_ami.sh new file mode 100644 index 00000000000..2609c1a69f3 --- /dev/null +++ b/tests/ci/worker/ubuntu_ami.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running prepare script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip \ + unzip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +./bin/installdependencies.sh + +chown -R ubuntu:ubuntu $RUNNER_HOME + +cd /home/ubuntu +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +./aws/install + +rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws From 36635736e6aaaf60c1faf7e1632384e06a278313 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 12:30:17 +0300 Subject: [PATCH 222/238] Also run on master --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 49760995dfc..baa40e99418 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,5 +1,8 @@ name: Ligthweight GithubActions on: # yamllint disable-line rule:truthy + push: + branches: + - master pull_request: types: - labeled From 83787e26f2f1a287c12886ad316733dcbc3676cf Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 12:43:03 +0300 Subject: [PATCH 223/238] SQLUserDefinedFunctions support lambdas --- .../InterpreterCreateFunctionQuery.cpp | 40 +++---------------- .../InterpreterCreateFunctionQuery.h | 1 - .../UserDefinedSQLFunctionVisitor.cpp | 10 ++++- ...l_user_defined_functions_aliases.reference | 1 + ...098_sql_user_defined_functions_aliases.sql | 4 ++ ...ql_user_defined_functions_lambda.reference | 1 + ...2099_sql_user_defined_functions_lambda.sql | 4 ++ 7 files changed, 24 insertions(+), 37 deletions(-) create mode 100644 tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference create mode 100644 tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql create mode 100644 tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference create mode 100644 tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index ccb5f4040ec..9d92466c440 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -1,14 +1,17 @@ +#include + +#include + #include #include #include #include #include #include -#include #include #include #include -#include + namespace DB { @@ -66,42 +69,9 @@ void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const Str } ASTPtr function_body = function->as()->children.at(0)->children.at(1); - std::unordered_set identifiers_in_body = getIdentifiers(function_body); - - for (const auto & identifier : identifiers_in_body) - { - if (!arguments.contains(identifier)) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier {} does not exist in arguments", backQuote(identifier)); - } - validateFunctionRecursiveness(function_body, name); } -std::unordered_set InterpreterCreateFunctionQuery::getIdentifiers(ASTPtr node) -{ - std::unordered_set identifiers; - - std::stack ast_nodes_to_process; - ast_nodes_to_process.push(node); - - while (!ast_nodes_to_process.empty()) - { - auto ast_node_to_process = ast_nodes_to_process.top(); - ast_nodes_to_process.pop(); - - for (const auto & child : ast_node_to_process->children) - { - auto identifier_name_opt = tryGetIdentifierName(child); - if (identifier_name_opt) - identifiers.insert(identifier_name_opt.value()); - - ast_nodes_to_process.push(child); - } - } - - return identifiers; -} - void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create) { for (const auto & child : node->children) diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.h b/src/Interpreters/InterpreterCreateFunctionQuery.h index fdc03b379db..a67fdb9605d 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.h +++ b/src/Interpreters/InterpreterCreateFunctionQuery.h @@ -22,7 +22,6 @@ public: private: static void validateFunction(ASTPtr function, const String & name); - static std::unordered_set getIdentifiers(ASTPtr node); static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create); ASTPtr query_ptr; diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index cc5db020387..8df6932b153 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -25,6 +25,7 @@ void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) return; auto result = tryToReplaceFunction(*function); + if (result) ast = result; } @@ -83,9 +84,16 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f if (identifier_name_opt) { auto function_argument_it = identifier_name_to_function_argument.find(*identifier_name_opt); - assert(function_argument_it != identifier_name_to_function_argument.end()); + if (function_argument_it == identifier_name_to_function_argument.end()) + continue; + + auto child_alias = child->tryGetAlias(); child = function_argument_it->second->clone(); + + if (!child_alias.empty()) + child->setAlias(child_alias); + continue; } diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference new file mode 100644 index 00000000000..8ab2f6d0ac6 --- /dev/null +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference @@ -0,0 +1 @@ +8 4 diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql new file mode 100644 index 00000000000..fef2daf8fd1 --- /dev/null +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel +CREATE FUNCTION alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); +SELECT alias_function(2); +DROP FUNCTION alias_function; diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference new file mode 100644 index 00000000000..8f6cd5ccd03 --- /dev/null +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference @@ -0,0 +1 @@ +[2,4,6] diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql new file mode 100644 index 00000000000..e66651c1e5a --- /dev/null +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel +CREATE FUNCTION lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); +SELECT lambda_function([1,2,3]); +DROP FUNCTION lambda_function; From 75d77339e9b05f15a4de59cf76086ec59838149e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 12:45:02 +0300 Subject: [PATCH 224/238] Remove master --- .github/workflows/main.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index baa40e99418..49760995dfc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,8 +1,5 @@ name: Ligthweight GithubActions on: # yamllint disable-line rule:truthy - push: - branches: - - master pull_request: types: - labeled From 3496cd1bfec9ac4bce9119651fbb14f58e1824d3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 12:53:03 +0300 Subject: [PATCH 225/238] ExecutableUDF example --- src/Common/examples/CMakeLists.txt | 3 ++ src/Common/examples/executable_udf.cpp | 44 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 src/Common/examples/executable_udf.cpp diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index e72681621cb..020f3cc4446 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -77,3 +77,6 @@ target_link_libraries (average PRIVATE clickhouse_common_io) add_executable (shell_command_inout shell_command_inout.cpp) target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) + +add_executable (executable_udf executable_udf.cpp) +target_link_libraries (executable_udf PRIVATE dbms) diff --git a/src/Common/examples/executable_udf.cpp b/src/Common/examples/executable_udf.cpp new file mode 100644 index 00000000000..78a248fcddf --- /dev/null +++ b/src/Common/examples/executable_udf.cpp @@ -0,0 +1,44 @@ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +using namespace DB; + +int main(int argc, char **argv) +{ + (void)(argc); + (void)(argv); + + std::string buffer; + + ReadBufferFromFileDescriptor read_buffer(0); + WriteBufferFromFileDescriptor write_buffer(1); + size_t rows = 0; + char dummy; + + while (!read_buffer.eof()) { + readIntText(rows, read_buffer); + readChar(dummy, read_buffer); + + for (size_t i = 0; i < rows; ++i) { + readString(buffer, read_buffer); + readChar(dummy, read_buffer); + + writeString("Key ", write_buffer); + writeString(buffer, write_buffer); + writeChar('\n', write_buffer); + } + + write_buffer.next(); + } + + return 0; +} From ad409d9b47ae1dd3c492a46e32d0f66701dd122c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 13:05:57 +0300 Subject: [PATCH 226/238] SQLUserDefinedFunctions added DROP IF EXISTS support --- src/Interpreters/InterpreterDropFunctionQuery.cpp | 5 +++++ src/Interpreters/UserDefinedSQLFunctionFactory.cpp | 5 +++++ src/Interpreters/UserDefinedSQLFunctionFactory.h | 2 ++ src/Parsers/ASTDropFunctionQuery.cpp | 2 ++ src/Parsers/ASTDropFunctionQuery.h | 2 ++ src/Parsers/ParserDropFunctionQuery.cpp | 7 +++++++ ...1_sql_user_defined_functions_drop_if_exists.reference | 1 + .../02101_sql_user_defined_functions_drop_if_exists.sql | 9 +++++++++ 8 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index 53cb96b42fe..b788c8f960f 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -18,6 +18,11 @@ BlockIO InterpreterDropFunctionQuery::execute() FunctionNameNormalizer().visit(query_ptr.get()); auto & drop_function_query = query_ptr->as(); + auto & user_defined_functions_factory = UserDefinedSQLFunctionFactory::instance(); + + if (drop_function_query.if_exists && !user_defined_functions_factory.has(drop_function_query.function_name)) + return {}; + UserDefinedSQLFunctionFactory::instance().unregisterFunction(drop_function_query.function_name); UserDefinedSQLObjectsLoader::instance().removeObject(current_context, UserDefinedSQLObjectType::Function, drop_function_query.function_name); diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp index 434f5523b42..1d2a80305c6 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp @@ -77,6 +77,11 @@ ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) return it->second; } +bool UserDefinedSQLFunctionFactory::has(const String & function_name) const +{ + return tryGet(function_name) != nullptr; +} + std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const { std::vector registered_names; diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h index 366e27e833d..6838c2f9892 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.h @@ -23,6 +23,8 @@ public: ASTPtr tryGet(const String & function_name) const; + bool has(const String & function_name) const; + std::vector getAllRegisteredNames() const override; private: diff --git a/src/Parsers/ASTDropFunctionQuery.cpp b/src/Parsers/ASTDropFunctionQuery.cpp index 5800a7ba9cb..0a46940e73d 100644 --- a/src/Parsers/ASTDropFunctionQuery.cpp +++ b/src/Parsers/ASTDropFunctionQuery.cpp @@ -14,6 +14,8 @@ void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAS { settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); + if (if_exists) + settings.ostr << "IF EXISTS"; } } diff --git a/src/Parsers/ASTDropFunctionQuery.h b/src/Parsers/ASTDropFunctionQuery.h index e32bf93a64d..a9d70a3016f 100644 --- a/src/Parsers/ASTDropFunctionQuery.h +++ b/src/Parsers/ASTDropFunctionQuery.h @@ -10,6 +10,8 @@ class ASTDropFunctionQuery : public IAST public: String function_name; + bool if_exists = false; + String getID(char) const override { return "DropFunctionQuery"; } ASTPtr clone() const override; diff --git a/src/Parsers/ParserDropFunctionQuery.cpp b/src/Parsers/ParserDropFunctionQuery.cpp index 04d26109836..d8c86646410 100644 --- a/src/Parsers/ParserDropFunctionQuery.cpp +++ b/src/Parsers/ParserDropFunctionQuery.cpp @@ -11,7 +11,10 @@ bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec { ParserKeyword s_drop("DROP"); ParserKeyword s_function("FUNCTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserIdentifier function_name_p; + bool if_exists = false; ASTPtr function_name; @@ -21,10 +24,14 @@ bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec if (!s_function.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!function_name_p.parse(pos, function_name, expected)) return false; auto drop_function_query = std::make_shared(); + drop_function_query->if_exists = if_exists; node = drop_function_query; drop_function_query->function_name = function_name->as().name(); diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql new file mode 100644 index 00000000000..09e2677774c --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +CREATE FUNCTION 02101_test_function AS x -> x + 1; + +SELECT 02101_test_function(1); + +DROP FUNCTION 02101_test_function; +DROP FUNCTION 02101_test_function; --{serverError 46} +DROP FUNCTION IF EXISTS 02101_test_function; From fe93533ba45727225ff2e00cfcb87e3bc753d813 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 13:09:33 +0300 Subject: [PATCH 227/238] Fixed tests --- .../02098_sql_user_defined_functions_aliases.sql | 6 +++--- .../0_stateless/02099_sql_user_defined_functions_lambda.sql | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql index fef2daf8fd1..c5bd2b5b5f2 100644 --- a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql @@ -1,4 +1,4 @@ -- Tags: no-parallel -CREATE FUNCTION alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); -SELECT alias_function(2); -DROP FUNCTION alias_function; +CREATE FUNCTION 02098_alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); +SELECT 02098_alias_function(2); +DROP FUNCTION 02098_alias_function; diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql index e66651c1e5a..1c926faf3a1 100644 --- a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql @@ -1,4 +1,4 @@ -- Tags: no-parallel -CREATE FUNCTION lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); -SELECT lambda_function([1,2,3]); -DROP FUNCTION lambda_function; +CREATE FUNCTION 02099_lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); +SELECT 02099_lambda_function([1,2,3]); +DROP FUNCTION 02099_lambda_function; From dc964080126b5446ff9dec209ce9f8a7fa2a648e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 13:09:48 +0300 Subject: [PATCH 228/238] Fixed build --- src/Interpreters/InterpreterCreateFunctionQuery.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index 9d92466c440..c3d02fa4f34 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -18,7 +18,6 @@ namespace DB namespace ErrorCodes { - extern const int UNKNOWN_IDENTIFIER; extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; extern const int UNSUPPORTED_METHOD; } From ba442b7ce5e04775801f2e7118eb05111e6cc200 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 13:31:48 +0300 Subject: [PATCH 229/238] Use robot token in actions for statuses --- .github/workflows/main.yml | 20 ++++++++++++++++---- tests/ci/docker_images_check.py | 3 ++- tests/ci/finish_check.py | 3 ++- tests/ci/get_robot_token.py | 14 ++++++++++++++ tests/ci/pvs_check.py | 3 ++- tests/ci/run_check.py | 3 ++- tests/ci/style_check.py | 3 ++- 7 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 tests/ci/get_robot_token.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 49760995dfc..60be4368df7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,10 @@ jobs: - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -30,7 +33,10 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 @@ -52,7 +58,10 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py FinishCheck: needs: [StyleCheck, DockerHubPush, CheckLabels] @@ -63,4 +72,7 @@ jobs: - name: Finish label run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 141d075cc6d..d1954d70e71 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import os from pr_info import PRInfo from github import Github import shutil +from get_robot_token import get_best_robot_token NAME = "Push to Dockerhub (actions)" @@ -222,7 +223,7 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=url) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 89139468fd6..db405cf8f73 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -4,6 +4,7 @@ from github import Github from pr_info import PRInfo import json import os +from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -34,7 +35,7 @@ if __name__ == "__main__": event = json.load(event_file) pr_info = PRInfo(event, need_orgs=True) - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py new file mode 100644 index 00000000000..75b688e5b44 --- /dev/null +++ b/tests/ci/get_robot_token.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +import os +from github import Github + +def get_best_robot_token(token_prefix_env_name="ROBOT_TOKEN_", total_tokens=4): + tokens = {} + for i in range(total_tokens): + token_name = token_prefix_env_name + str(i) + token = os.getenv(token_name) + gh = Github(token) + rest, _ = gh.rate_limiting + tokens[token] = rest + + return max(tokens.items(), key=lambda x: x[1])[0] diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index c254ad74ae4..34052adecdf 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -9,6 +9,7 @@ from s3_helper import S3Helper from pr_info import PRInfo import shutil import sys +from get_robot_token import get_best_robot_token NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -83,7 +84,7 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 95e827671ca..e6bc7259330 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -6,6 +6,7 @@ from pr_info import PRInfo import sys import logging from github import Github +from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -113,7 +114,7 @@ if __name__ == "__main__": pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 71978379099..0b1d673e628 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,6 +10,7 @@ from s3_helper import S3Helper import time import json from pr_info import PRInfo +from get_robot_token import get_best_robot_token NAME = "Style Check (actions)" @@ -108,7 +109,7 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/style-test' From 0dcb36df0155182727726bf326eb08510752cc15 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 13:57:41 +0300 Subject: [PATCH 230/238] Remove statuses from actions --- .github/workflows/main.yml | 26 -------------------------- tests/ci/docker_images_check.py | 10 ---------- tests/ci/pvs_check.py | 10 ---------- tests/ci/run_check.py | 7 ------- tests/ci/style_check.py | 11 ----------- 5 files changed, 64 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 60be4368df7..46a66ce98ce 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,11 +17,6 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -33,10 +28,6 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 @@ -58,21 +49,4 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - FinishCheck: - needs: [StyleCheck, DockerHubPush, CheckLabels] - runs-on: [self-hosted] - steps: - - name: Check out repository code - uses: actions/checkout@v2 - - name: Finish label - run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d1954d70e71..0ddca0718e7 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,7 +8,6 @@ import os from pr_info import PRInfo from github import Github import shutil -from get_robot_token import get_best_robot_token NAME = "Push to Dockerhub (actions)" @@ -168,11 +167,6 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): logging.info("Search result in url %s", url) return url -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit - if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -223,10 +217,6 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) - gh = Github(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=url) - with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 34052adecdf..94e046c0a68 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -9,7 +9,6 @@ from s3_helper import S3Helper from pr_info import PRInfo import shutil import sys -from get_robot_token import get_best_robot_token NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -40,11 +39,6 @@ def _process_txt_report(path): errors.append(':'.join(line.split('\t')[0:2])) return warnings, errors -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit - def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) @@ -84,8 +78,6 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) - images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' if os.path.exists(images_path): @@ -138,8 +130,6 @@ if __name__ == "__main__": report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) print("::notice ::Report url: {}".format(report_url)) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=report_url) except Exception as ex: print("Got an exception", ex) sys.exit(1) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index e6bc7259330..3371e5a5720 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,8 +5,6 @@ import requests from pr_info import PRInfo import sys import logging -from github import Github -from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -114,13 +112,8 @@ if __name__ == "__main__": pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) - gh = Github(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: print("::notice ::Cannot run") - commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: print("::notice ::Can run") - commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 0b1d673e628..c7c25d2a95b 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,7 +10,6 @@ from s3_helper import S3Helper import time import json from pr_info import PRInfo -from get_robot_token import get_best_robot_token NAME = "Style Check (actions)" @@ -79,12 +78,6 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi logging.info("Search result in url %s", url) return url - -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit - def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " @@ -109,8 +102,6 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) - images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/style-test' if os.path.exists(images_path): @@ -141,5 +132,3 @@ if __name__ == "__main__": state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From 0d6712532ad00274bbcfa9b20d5832c0123db88c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 14:25:14 +0300 Subject: [PATCH 231/238] Revert "Remove statuses from actions" This reverts commit 0dcb36df0155182727726bf326eb08510752cc15. --- .github/workflows/main.yml | 26 ++++++++++++++++++++++++++ tests/ci/docker_images_check.py | 10 ++++++++++ tests/ci/pvs_check.py | 10 ++++++++++ tests/ci/run_check.py | 7 +++++++ tests/ci/style_check.py | 11 +++++++++++ 5 files changed, 64 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 46a66ce98ce..60be4368df7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,6 +17,11 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py + env: + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -28,6 +33,10 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 @@ -49,4 +58,21 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + FinishCheck: + needs: [StyleCheck, DockerHubPush, CheckLabels] + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py + env: + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0ddca0718e7..d1954d70e71 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import os from pr_info import PRInfo from github import Github import shutil +from get_robot_token import get_best_robot_token NAME = "Push to Dockerhub (actions)" @@ -167,6 +168,11 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): logging.info("Search result in url %s", url) return url +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -217,6 +223,10 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 94e046c0a68..34052adecdf 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -9,6 +9,7 @@ from s3_helper import S3Helper from pr_info import PRInfo import shutil import sys +from get_robot_token import get_best_robot_token NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -39,6 +40,11 @@ def _process_txt_report(path): errors.append(':'.join(line.split('\t')[0:2])) return warnings, errors +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) @@ -78,6 +84,8 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + gh = Github(get_best_robot_token()) + images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' if os.path.exists(images_path): @@ -130,6 +138,8 @@ if __name__ == "__main__": report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) except Exception as ex: print("Got an exception", ex) sys.exit(1) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 3371e5a5720..e6bc7259330 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,6 +5,8 @@ import requests from pr_info import PRInfo import sys import logging +from github import Github +from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -112,8 +114,13 @@ if __name__ == "__main__": pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: print("::notice ::Cannot run") + commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: print("::notice ::Can run") + commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index c7c25d2a95b..0b1d673e628 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,6 +10,7 @@ from s3_helper import S3Helper import time import json from pr_info import PRInfo +from get_robot_token import get_best_robot_token NAME = "Style Check (actions)" @@ -78,6 +79,12 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi logging.info("Search result in url %s", url) return url + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " @@ -102,6 +109,8 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + gh = Github(get_best_robot_token()) + images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/style-test' if os.path.exists(images_path): @@ -132,3 +141,5 @@ if __name__ == "__main__": state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From d120d3720af4d59013d696536a0aa6f950ae6394 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 14:48:27 +0300 Subject: [PATCH 232/238] Trying aws secrets --- .github/workflows/main.yml | 25 ------------------------- tests/ci/docker_images_check.py | 9 +++------ tests/ci/get_robot_token.py | 14 ++++++++++---- tests/ci/pvs_check.py | 8 +------- tests/ci/s3_helper.py | 7 ++++--- tests/ci/style_check.py | 8 +------- 6 files changed, 19 insertions(+), 52 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 60be4368df7..7f20206a7b3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,11 +17,6 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -30,14 +25,6 @@ jobs: uses: actions/checkout@v2 - name: Images check run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} - DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: @@ -55,13 +42,6 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Style Check - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py FinishCheck: needs: [StyleCheck, DockerHubPush, CheckLabels] @@ -71,8 +51,3 @@ jobs: uses: actions/checkout@v2 - name: Finish label run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d1954d70e71..9bd3f431429 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,7 +8,7 @@ import os from pr_info import PRInfo from github import Github import shutil -from get_robot_token import get_best_robot_token +from get_robot_token import get_best_robot_token, get_parameter_from_ssm NAME = "Push to Dockerhub (actions)" @@ -177,7 +177,7 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') - dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') if os.path.exists(temp_path): shutil.rmtree(temp_path) @@ -213,10 +213,7 @@ if __name__ == "__main__": if len(description) >= 140: description = description[:136] + "..." - aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") - aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + s3_helper = S3Helper('https://s3.amazonaws.com') s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 75b688e5b44..73fdcd670fd 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,12 +1,18 @@ #!/usr/bin/env python3 -import os +import boto3 from github import Github -def get_best_robot_token(token_prefix_env_name="ROBOT_TOKEN_", total_tokens=4): +def get_parameter_from_ssm(name, decrypt=True, client=None): + if not client: + client = boto3.client('ssm') + return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] + +def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): + client = boto3.client('ssm') tokens = {} - for i in range(total_tokens): + for i in range(1, total_tokens + 1): token_name = token_prefix_env_name + str(i) - token = os.getenv(token_name) + token = get_parameter_from_ssm(token_name, True, client) gh = Github(token) rest, _ = gh.rate_limiting tokens[token] = rest diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 34052adecdf..f8b1b58f307 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -81,9 +81,6 @@ if __name__ == "__main__": # this check modify repository so copy it to the temp directory logging.info("Repo copy path %s", repo_path) - aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") - aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') @@ -98,10 +95,7 @@ if __name__ == "__main__": logging.info("Got docker image %s", docker_image) - if not aws_secret_key_id or not aws_secret_key: - logging.info("No secrets, will not upload anything to S3") - - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + s3_helper = S3Helper('https://s3.amazonaws.com') licence_key = os.getenv('PVS_STUDIO_KEY') cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index b9ae0de6e02..4054f650223 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -6,6 +6,7 @@ import boto3 from botocore.exceptions import ClientError, BotoCoreError from multiprocessing.dummy import Pool from compress_files import compress_file_fast +from get_robot_token import get_parameter_from_ssm def _md5(fname): hash_md5 = hashlib.md5() @@ -27,8 +28,8 @@ def _flatten_list(lst): class S3Helper(object): - def __init__(self, host, aws_access_key_id, aws_secret_access_key): - self.session = boto3.session.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) + def __init__(self, host): + self.session = boto3.session.Session() self.client = self.session.client('s3', endpoint_url=host) def _upload_file_to_s3(self, bucket_name, file_path, s3_path): @@ -55,7 +56,7 @@ class S3Helper(object): self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata)) - return "https://storage.yandexcloud.net/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) + return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) def upload_test_report_to_s3(self, file_path, s3_path): return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 0b1d673e628..4a8cde70bc2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -106,9 +106,6 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") - aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') @@ -132,10 +129,7 @@ if __name__ == "__main__": else: raise Exception(f"Cannot pull dockerhub for image {docker_image}") - if not aws_secret_key_id or not aws_secret_key: - logging.info("No secrets, will not upload anything to S3") - - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + s3_helper = S3Helper('https://s3.amazonaws.com') subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) From 6ea04b2ea66c50f815d102345dc08afc7d56ca85 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 14:52:03 +0300 Subject: [PATCH 233/238] Fix region --- tests/ci/get_robot_token.py | 4 ++-- tests/ci/s3_helper.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 73fdcd670fd..db37ee311c5 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -4,11 +4,11 @@ from github import Github def get_parameter_from_ssm(name, decrypt=True, client=None): if not client: - client = boto3.client('ssm') + client = boto3.client('ssm', region_name='us-east-1') return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): - client = boto3.client('ssm') + client = boto3.client('ssm', region_name='us-east-1') tokens = {} for i in range(1, total_tokens + 1): token_name = token_prefix_env_name + str(i) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 4054f650223..3c930f26634 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -29,7 +29,7 @@ def _flatten_list(lst): class S3Helper(object): def __init__(self, host): - self.session = boto3.session.Session() + self.session = boto3.session.Session(region_name='us-east-1') self.client = self.session.client('s3', endpoint_url=host) def _upload_file_to_s3(self, bucket_name, file_path, s3_path): From ac358d08a6477715448a6b0b2834e19b3430d613 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 15:33:20 +0300 Subject: [PATCH 234/238] Fix style check --- src/Common/examples/executable_udf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/executable_udf.cpp b/src/Common/examples/executable_udf.cpp index 78a248fcddf..8d2d9f7314e 100644 --- a/src/Common/examples/executable_udf.cpp +++ b/src/Common/examples/executable_udf.cpp @@ -24,11 +24,13 @@ int main(int argc, char **argv) size_t rows = 0; char dummy; - while (!read_buffer.eof()) { + while (!read_buffer.eof()) + { readIntText(rows, read_buffer); readChar(dummy, read_buffer); - for (size_t i = 0; i < rows; ++i) { + for (size_t i = 0; i < rows; ++i) + { readString(buffer, read_buffer); readChar(dummy, read_buffer); From 54d37204265a523a6aa26fbfd6506186d45333da Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 16:08:13 +0300 Subject: [PATCH 235/238] ASTDropFunctionQuery formatting fix --- src/Parsers/ASTDropFunctionQuery.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ASTDropFunctionQuery.cpp b/src/Parsers/ASTDropFunctionQuery.cpp index 0a46940e73d..47665aa52f9 100644 --- a/src/Parsers/ASTDropFunctionQuery.cpp +++ b/src/Parsers/ASTDropFunctionQuery.cpp @@ -12,10 +12,13 @@ ASTPtr ASTDropFunctionQuery::clone() const void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : ""); - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION "; + if (if_exists) - settings.ostr << "IF EXISTS"; + settings.ostr << "IF EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); } } From 367e58357ad6c7e286860640ec02161f3e338801 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 20 Oct 2021 13:43:39 +0000 Subject: [PATCH 236/238] Better test --- .../0_stateless/01236_graphite_mt.reference | 344 ++++++++++++++++++ .../queries/0_stateless/01236_graphite_mt.sql | 4 + 2 files changed, 348 insertions(+) diff --git a/tests/queries/0_stateless/01236_graphite_mt.reference b/tests/queries/0_stateless/01236_graphite_mt.reference index a30d2495265..0f2e8e81377 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.reference +++ b/tests/queries/0_stateless/01236_graphite_mt.reference @@ -342,3 +342,347 @@ 2 sum_2 98950 1 940 2 sum_2 108950 1 1040 2 sum_2 70170 1 1140 +1 max_1 9 1 0 +1 max_1 19 1 10 +1 max_1 29 1 20 +1 max_1 39 1 30 +1 max_1 49 1 40 +1 max_1 59 1 50 +1 max_1 69 1 60 +1 max_1 79 1 70 +1 max_1 89 1 80 +1 max_1 99 1 90 +1 max_1 109 1 100 +1 max_1 119 1 110 +1 max_1 129 1 120 +1 max_1 139 1 130 +1 max_1 149 1 140 +1 max_1 159 1 150 +1 max_1 169 1 160 +1 max_1 179 1 170 +1 max_1 189 1 180 +1 max_1 199 1 190 +1 max_1 209 1 200 +1 max_1 219 1 210 +1 max_1 229 1 220 +1 max_1 239 1 230 +1 max_1 249 1 240 +1 max_1 259 1 250 +1 max_1 269 1 260 +1 max_1 279 1 270 +1 max_1 289 1 280 +1 max_1 299 1 290 +1 max_1 39 1 0 +1 max_1 139 1 40 +1 max_1 239 1 140 +1 max_1 339 1 240 +1 max_1 439 1 340 +1 max_1 539 1 440 +1 max_1 639 1 540 +1 max_1 739 1 640 +1 max_1 839 1 740 +1 max_1 939 1 840 +1 max_1 1039 1 940 +1 max_1 1139 1 1040 +1 max_1 1199 1 1140 +1 max_2 9 1 0 +1 max_2 19 1 10 +1 max_2 29 1 20 +1 max_2 39 1 30 +1 max_2 49 1 40 +1 max_2 59 1 50 +1 max_2 69 1 60 +1 max_2 79 1 70 +1 max_2 89 1 80 +1 max_2 99 1 90 +1 max_2 109 1 100 +1 max_2 119 1 110 +1 max_2 129 1 120 +1 max_2 139 1 130 +1 max_2 149 1 140 +1 max_2 159 1 150 +1 max_2 169 1 160 +1 max_2 179 1 170 +1 max_2 189 1 180 +1 max_2 199 1 190 +1 max_2 209 1 200 +1 max_2 219 1 210 +1 max_2 229 1 220 +1 max_2 239 1 230 +1 max_2 249 1 240 +1 max_2 259 1 250 +1 max_2 269 1 260 +1 max_2 279 1 270 +1 max_2 289 1 280 +1 max_2 299 1 290 +1 max_2 39 1 0 +1 max_2 139 1 40 +1 max_2 239 1 140 +1 max_2 339 1 240 +1 max_2 439 1 340 +1 max_2 539 1 440 +1 max_2 639 1 540 +1 max_2 739 1 640 +1 max_2 839 1 740 +1 max_2 939 1 840 +1 max_2 1039 1 940 +1 max_2 1139 1 1040 +1 max_2 1199 1 1140 +1 sum_1 45 1 0 +1 sum_1 145 1 10 +1 sum_1 245 1 20 +1 sum_1 345 1 30 +1 sum_1 445 1 40 +1 sum_1 545 1 50 +1 sum_1 645 1 60 +1 sum_1 745 1 70 +1 sum_1 845 1 80 +1 sum_1 945 1 90 +1 sum_1 1045 1 100 +1 sum_1 1145 1 110 +1 sum_1 1245 1 120 +1 sum_1 1345 1 130 +1 sum_1 1445 1 140 +1 sum_1 1545 1 150 +1 sum_1 1645 1 160 +1 sum_1 1745 1 170 +1 sum_1 1845 1 180 +1 sum_1 1945 1 190 +1 sum_1 2045 1 200 +1 sum_1 2145 1 210 +1 sum_1 2245 1 220 +1 sum_1 2345 1 230 +1 sum_1 2445 1 240 +1 sum_1 2545 1 250 +1 sum_1 2645 1 260 +1 sum_1 2745 1 270 +1 sum_1 2845 1 280 +1 sum_1 2945 1 290 +1 sum_1 780 1 0 +1 sum_1 8950 1 40 +1 sum_1 18950 1 140 +1 sum_1 28950 1 240 +1 sum_1 38950 1 340 +1 sum_1 48950 1 440 +1 sum_1 58950 1 540 +1 sum_1 68950 1 640 +1 sum_1 78950 1 740 +1 sum_1 88950 1 840 +1 sum_1 98950 1 940 +1 sum_1 108950 1 1040 +1 sum_1 70170 1 1140 +1 sum_2 45 1 0 +1 sum_2 145 1 10 +1 sum_2 245 1 20 +1 sum_2 345 1 30 +1 sum_2 445 1 40 +1 sum_2 545 1 50 +1 sum_2 645 1 60 +1 sum_2 745 1 70 +1 sum_2 845 1 80 +1 sum_2 945 1 90 +1 sum_2 1045 1 100 +1 sum_2 1145 1 110 +1 sum_2 1245 1 120 +1 sum_2 1345 1 130 +1 sum_2 1445 1 140 +1 sum_2 1545 1 150 +1 sum_2 1645 1 160 +1 sum_2 1745 1 170 +1 sum_2 1845 1 180 +1 sum_2 1945 1 190 +1 sum_2 2045 1 200 +1 sum_2 2145 1 210 +1 sum_2 2245 1 220 +1 sum_2 2345 1 230 +1 sum_2 2445 1 240 +1 sum_2 2545 1 250 +1 sum_2 2645 1 260 +1 sum_2 2745 1 270 +1 sum_2 2845 1 280 +1 sum_2 2945 1 290 +1 sum_2 780 1 0 +1 sum_2 8950 1 40 +1 sum_2 18950 1 140 +1 sum_2 28950 1 240 +1 sum_2 38950 1 340 +1 sum_2 48950 1 440 +1 sum_2 58950 1 540 +1 sum_2 68950 1 640 +1 sum_2 78950 1 740 +1 sum_2 88950 1 840 +1 sum_2 98950 1 940 +1 sum_2 108950 1 1040 +1 sum_2 70170 1 1140 +2 max_1 9 1 0 +2 max_1 19 1 10 +2 max_1 29 1 20 +2 max_1 39 1 30 +2 max_1 49 1 40 +2 max_1 59 1 50 +2 max_1 69 1 60 +2 max_1 79 1 70 +2 max_1 89 1 80 +2 max_1 99 1 90 +2 max_1 109 1 100 +2 max_1 119 1 110 +2 max_1 129 1 120 +2 max_1 139 1 130 +2 max_1 149 1 140 +2 max_1 159 1 150 +2 max_1 169 1 160 +2 max_1 179 1 170 +2 max_1 189 1 180 +2 max_1 199 1 190 +2 max_1 209 1 200 +2 max_1 219 1 210 +2 max_1 229 1 220 +2 max_1 239 1 230 +2 max_1 249 1 240 +2 max_1 259 1 250 +2 max_1 269 1 260 +2 max_1 279 1 270 +2 max_1 289 1 280 +2 max_1 299 1 290 +2 max_1 39 1 0 +2 max_1 139 1 40 +2 max_1 239 1 140 +2 max_1 339 1 240 +2 max_1 439 1 340 +2 max_1 539 1 440 +2 max_1 639 1 540 +2 max_1 739 1 640 +2 max_1 839 1 740 +2 max_1 939 1 840 +2 max_1 1039 1 940 +2 max_1 1139 1 1040 +2 max_1 1199 1 1140 +2 max_2 9 1 0 +2 max_2 19 1 10 +2 max_2 29 1 20 +2 max_2 39 1 30 +2 max_2 49 1 40 +2 max_2 59 1 50 +2 max_2 69 1 60 +2 max_2 79 1 70 +2 max_2 89 1 80 +2 max_2 99 1 90 +2 max_2 109 1 100 +2 max_2 119 1 110 +2 max_2 129 1 120 +2 max_2 139 1 130 +2 max_2 149 1 140 +2 max_2 159 1 150 +2 max_2 169 1 160 +2 max_2 179 1 170 +2 max_2 189 1 180 +2 max_2 199 1 190 +2 max_2 209 1 200 +2 max_2 219 1 210 +2 max_2 229 1 220 +2 max_2 239 1 230 +2 max_2 249 1 240 +2 max_2 259 1 250 +2 max_2 269 1 260 +2 max_2 279 1 270 +2 max_2 289 1 280 +2 max_2 299 1 290 +2 max_2 39 1 0 +2 max_2 139 1 40 +2 max_2 239 1 140 +2 max_2 339 1 240 +2 max_2 439 1 340 +2 max_2 539 1 440 +2 max_2 639 1 540 +2 max_2 739 1 640 +2 max_2 839 1 740 +2 max_2 939 1 840 +2 max_2 1039 1 940 +2 max_2 1139 1 1040 +2 max_2 1199 1 1140 +2 sum_1 45 1 0 +2 sum_1 145 1 10 +2 sum_1 245 1 20 +2 sum_1 345 1 30 +2 sum_1 445 1 40 +2 sum_1 545 1 50 +2 sum_1 645 1 60 +2 sum_1 745 1 70 +2 sum_1 845 1 80 +2 sum_1 945 1 90 +2 sum_1 1045 1 100 +2 sum_1 1145 1 110 +2 sum_1 1245 1 120 +2 sum_1 1345 1 130 +2 sum_1 1445 1 140 +2 sum_1 1545 1 150 +2 sum_1 1645 1 160 +2 sum_1 1745 1 170 +2 sum_1 1845 1 180 +2 sum_1 1945 1 190 +2 sum_1 2045 1 200 +2 sum_1 2145 1 210 +2 sum_1 2245 1 220 +2 sum_1 2345 1 230 +2 sum_1 2445 1 240 +2 sum_1 2545 1 250 +2 sum_1 2645 1 260 +2 sum_1 2745 1 270 +2 sum_1 2845 1 280 +2 sum_1 2945 1 290 +2 sum_1 780 1 0 +2 sum_1 8950 1 40 +2 sum_1 18950 1 140 +2 sum_1 28950 1 240 +2 sum_1 38950 1 340 +2 sum_1 48950 1 440 +2 sum_1 58950 1 540 +2 sum_1 68950 1 640 +2 sum_1 78950 1 740 +2 sum_1 88950 1 840 +2 sum_1 98950 1 940 +2 sum_1 108950 1 1040 +2 sum_1 70170 1 1140 +2 sum_2 45 1 0 +2 sum_2 145 1 10 +2 sum_2 245 1 20 +2 sum_2 345 1 30 +2 sum_2 445 1 40 +2 sum_2 545 1 50 +2 sum_2 645 1 60 +2 sum_2 745 1 70 +2 sum_2 845 1 80 +2 sum_2 945 1 90 +2 sum_2 1045 1 100 +2 sum_2 1145 1 110 +2 sum_2 1245 1 120 +2 sum_2 1345 1 130 +2 sum_2 1445 1 140 +2 sum_2 1545 1 150 +2 sum_2 1645 1 160 +2 sum_2 1745 1 170 +2 sum_2 1845 1 180 +2 sum_2 1945 1 190 +2 sum_2 2045 1 200 +2 sum_2 2145 1 210 +2 sum_2 2245 1 220 +2 sum_2 2345 1 230 +2 sum_2 2445 1 240 +2 sum_2 2545 1 250 +2 sum_2 2645 1 260 +2 sum_2 2745 1 270 +2 sum_2 2845 1 280 +2 sum_2 2945 1 290 +2 sum_2 780 1 0 +2 sum_2 8950 1 40 +2 sum_2 18950 1 140 +2 sum_2 28950 1 240 +2 sum_2 38950 1 340 +2 sum_2 48950 1 440 +2 sum_2 58950 1 540 +2 sum_2 68950 1 640 +2 sum_2 78950 1 740 +2 sum_2 88950 1 840 +2 sum_2 98950 1 940 +2 sum_2 108950 1 1040 +2 sum_2 70170 1 1140 diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index 1d531f88ecb..0ec905fa0a8 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -34,4 +34,8 @@ WITH dates AS select key, Path, Value, Version, col from test_graphite final order by key, Path, Time desc; +optimize table test_graphite final; + +select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc; + drop table test_graphite; From f47aec6751c830f9d3f8a099d813e9225646a143 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 17:52:28 +0300 Subject: [PATCH 237/238] Fixed tests --- tests/queries/0_stateless/01856_create_function.sql | 1 - .../02098_sql_user_defined_functions_aliases.reference | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01856_create_function.sql b/tests/queries/0_stateless/01856_create_function.sql index 10f87f4a3df..cdc4baad1af 100644 --- a/tests/queries/0_stateless/01856_create_function.sql +++ b/tests/queries/0_stateless/01856_create_function.sql @@ -4,7 +4,6 @@ CREATE FUNCTION 01856_test_function_0 AS (a, b, c) -> a * b * c; SELECT 01856_test_function_0(2, 3, 4); SELECT isConstant(01856_test_function_0(1, 2, 3)); DROP FUNCTION 01856_test_function_0; -CREATE FUNCTION 01856_test_function_1 AS (a, b) -> a || b || c; --{serverError 47} CREATE FUNCTION 01856_test_function_1 AS (a, b) -> 01856_test_function_1(a, b) + 01856_test_function_1(a, b); --{serverError 611} CREATE FUNCTION cast AS a -> a + 1; --{serverError 609} CREATE FUNCTION sum AS (a, b) -> a + b; --{serverError 609} diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference index 8ab2f6d0ac6..45a4fb75db8 100644 --- a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference @@ -1 +1 @@ -8 4 +8 From ff48017f4a35ee222f34d88f25df78eedf30f322 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 16:04:02 +0300 Subject: [PATCH 238/238] SQLUserDefinedFunctions support CREATE OR REPLACE, CREATE IF NOT EXISTS --- .../InterpreterCreateFunctionQuery.cpp | 20 +++++++++++++++---- src/Interpreters/InterpreterFactory.cpp | 2 +- .../UserDefinedSQLFunctionFactory.cpp | 16 ++++++++++----- .../UserDefinedSQLFunctionFactory.h | 12 ++++++++++- .../UserDefinedSQLObjectsLoader.cpp | 10 +++++----- .../UserDefinedSQLObjectsLoader.h | 2 +- src/Parsers/ASTCreateFunctionQuery.cpp | 13 +++++++++++- src/Parsers/ASTCreateFunctionQuery.h | 3 +++ src/Parsers/ParserCreateFunctionQuery.cpp | 17 +++++++++++++++- ...ined_functions_create_or_replace.reference | 4 ++++ ...er_defined_functions_create_or_replace.sql | 13 ++++++++++++ ...d_functions_create_if_not_exists.reference | 1 + ...defined_functions_create_if_not_exists.sql | 8 ++++++++ 13 files changed, 102 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql create mode 100644 tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference create mode 100644 tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index ccb5f4040ec..39fec4a941c 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -31,20 +31,32 @@ BlockIO InterpreterCreateFunctionQuery::execute() if (!create_function_query) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected CREATE FUNCTION query"); + auto & user_defined_function_factory = UserDefinedSQLFunctionFactory::instance(); + auto & function_name = create_function_query->function_name; + + bool if_not_exists = create_function_query->if_not_exists; + bool replace = create_function_query->or_replace; + + create_function_query->if_not_exists = false; + create_function_query->or_replace = false; + + if (if_not_exists && user_defined_function_factory.tryGet(function_name) != nullptr) + return {}; + validateFunction(create_function_query->function_core, function_name); - UserDefinedSQLFunctionFactory::instance().registerFunction(function_name, query_ptr); + user_defined_function_factory.registerFunction(function_name, query_ptr, replace); - if (!persist_function) + if (persist_function) { try { - UserDefinedSQLObjectsLoader::instance().storeObject(current_context, UserDefinedSQLObjectType::Function, function_name, *query_ptr); + UserDefinedSQLObjectsLoader::instance().storeObject(current_context, UserDefinedSQLObjectType::Function, function_name, *query_ptr, replace); } catch (Exception & exception) { - UserDefinedSQLFunctionFactory::instance().unregisterFunction(function_name); + user_defined_function_factory.unregisterFunction(function_name); exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name))); throw; } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 54307ae848b..fcf5f19aef6 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -278,7 +278,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else if (query->as()) { - return std::make_unique(query, context, false /*is_internal*/); + return std::make_unique(query, context, true /*persist_function*/); } else if (query->as()) { diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp index 1d2a80305c6..f036741ca21 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp @@ -19,7 +19,7 @@ UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() return result; } -void UserDefinedSQLFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query) +void UserDefinedSQLFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query, bool replace) { if (FunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); @@ -29,11 +29,17 @@ void UserDefinedSQLFunctionFactory::registerFunction(const String & function_nam std::lock_guard lock(mutex); - auto [_, inserted] = function_name_to_create_query.emplace(function_name, std::move(create_function_query)); + auto [it, inserted] = function_name_to_create_query.emplace(function_name, create_function_query); + if (!inserted) - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, - "The function name '{}' is not unique", - function_name); + { + if (replace) + it->second = std::move(create_function_query); + else + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, + "The function name '{}' is not unique", + function_name); + } } void UserDefinedSQLFunctionFactory::unregisterFunction(const String & function_name) diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h index 6838c2f9892..6487b951705 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.h @@ -10,21 +10,31 @@ namespace DB { +/// Factory for SQLUserDefinedFunctions class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> { public: static UserDefinedSQLFunctionFactory & instance(); - void registerFunction(const String & function_name, ASTPtr create_function_query); + /** Register function for function_name in factory for specified create_function_query. + * If replace = true and function with function_name already exists replace it with create_function_query. + * Otherwise throws exception. + */ + void registerFunction(const String & function_name, ASTPtr create_function_query, bool replace); + /// Unregister function for function_name void unregisterFunction(const String & function_name); + /// Get function create query for function_name. If no function registered with function_name throws exception. ASTPtr get(const String & function_name) const; + /// Get function create query for function_name. If no function registered with function_name return nullptr. ASTPtr tryGet(const String & function_name) const; + /// Check if function with function_name registered. bool has(const String & function_name) const; + /// Get all user defined functions registered names. std::vector getAllRegisteredNames() const override; private: diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp index e4eb97f3002..a71f1f0799c 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp @@ -69,7 +69,7 @@ void UserDefinedSQLObjectsLoader::loadUserDefinedObject(ContextPtr context, User 0, context->getSettingsRef().max_parser_depth); - InterpreterCreateFunctionQuery interpreter(ast, context, true /*is internal*/); + InterpreterCreateFunctionQuery interpreter(ast, context, false /*persist_function*/); interpreter.execute(); } } @@ -111,7 +111,7 @@ void UserDefinedSQLObjectsLoader::loadObjects(ContextPtr context) } } -void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast) +void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace) { if (unlikely(!enable_persistence)) return; @@ -127,7 +127,7 @@ void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQL } } - if (std::filesystem::exists(file_path)) + if (!replace && std::filesystem::exists(file_path)) throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path)); LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path); @@ -135,9 +135,9 @@ void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQL WriteBufferFromOwnString create_statement_buf; formatAST(ast, create_statement_buf, false); writeChar('\n', create_statement_buf); - String create_statement = create_statement_buf.str(); - WriteBufferFromFile out(file_path, create_statement.size(), O_WRONLY | O_CREAT | O_EXCL); + + WriteBufferFromFile out(file_path, create_statement.size()); writeString(create_statement, out); out.next(); if (context->getSettingsRef().fsync_metadata) diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.h b/src/Interpreters/UserDefinedSQLObjectsLoader.h index 17493933f21..2e747f67a8d 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.h +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.h @@ -21,7 +21,7 @@ public: UserDefinedSQLObjectsLoader(); void loadObjects(ContextPtr context); - void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast); + void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace); void removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name); /// For ClickHouse local if path is not set we can disable loader. diff --git a/src/Parsers/ASTCreateFunctionQuery.cpp b/src/Parsers/ASTCreateFunctionQuery.cpp index 0b3991ddc44..4e1e7de660d 100644 --- a/src/Parsers/ASTCreateFunctionQuery.cpp +++ b/src/Parsers/ASTCreateFunctionQuery.cpp @@ -12,7 +12,18 @@ ASTPtr ASTCreateFunctionQuery::clone() const void ASTCreateFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE FUNCTION " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE "; + + if (or_replace) + settings.ostr << "OR REPLACE "; + + settings.ostr << "FUNCTION "; + + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); function_core->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTCreateFunctionQuery.h b/src/Parsers/ASTCreateFunctionQuery.h index 3adddad8fbd..a58fe64c435 100644 --- a/src/Parsers/ASTCreateFunctionQuery.h +++ b/src/Parsers/ASTCreateFunctionQuery.h @@ -12,6 +12,9 @@ public: String function_name; ASTPtr function_core; + bool or_replace = false; + bool if_not_exists = false; + String getID(char) const override { return "CreateFunctionQuery"; } ASTPtr clone() const override; diff --git a/src/Parsers/ParserCreateFunctionQuery.cpp b/src/Parsers/ParserCreateFunctionQuery.cpp index fbfd02415e7..5d84b6bc2dc 100644 --- a/src/Parsers/ParserCreateFunctionQuery.cpp +++ b/src/Parsers/ParserCreateFunctionQuery.cpp @@ -1,10 +1,12 @@ +#include + #include #include #include #include #include #include -#include + namespace DB { @@ -13,6 +15,8 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp { ParserKeyword s_create("CREATE"); ParserKeyword s_function("FUNCTION"); + ParserKeyword s_or_replace("OR REPLACE"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserIdentifier function_name_p; ParserKeyword s_as("AS"); ParserLambdaExpression lambda_p; @@ -20,12 +24,21 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp ASTPtr function_name; ASTPtr function_core; + bool or_replace = false; + bool if_not_exists = false; + if (!s_create.ignore(pos, expected)) return false; + if (s_or_replace.ignore(pos, expected)) + or_replace = true; + if (!s_function.ignore(pos, expected)) return false; + if (!or_replace && s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!function_name_p.parse(pos, function_name, expected)) return false; @@ -40,6 +53,8 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp create_function_query->function_name = function_name->as().name(); create_function_query->function_core = function_core; + create_function_query->or_replace = or_replace; + create_function_query->if_not_exists = if_not_exists; return true; } diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference new file mode 100644 index 00000000000..437cc81afba --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference @@ -0,0 +1,4 @@ +CREATE FUNCTION `02101_test_function` AS x -> (x + 1) +2 +CREATE FUNCTION `02101_test_function` AS x -> (x + 2) +3 diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql new file mode 100644 index 00000000000..7b0ad311bd4 --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +CREATE OR REPLACE FUNCTION 02101_test_function AS x -> x + 1; + +SELECT create_query FROM system.functions WHERE name = '02101_test_function'; +SELECT 02101_test_function(1); + +CREATE OR REPLACE FUNCTION 02101_test_function AS x -> x + 2; + +SELECT create_query FROM system.functions WHERE name = '02101_test_function'; +SELECT 02101_test_function(1); + +DROP FUNCTION 02101_test_function; diff --git a/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql new file mode 100644 index 00000000000..092fa660cb0 --- /dev/null +++ b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel + +CREATE FUNCTION IF NOT EXISTS 02102_test_function AS x -> x + 1; +SELECT 02102_test_function(1); + +CREATE FUNCTION 02102_test_function AS x -> x + 1; --{serverError 609} +CREATE FUNCTION IF NOT EXISTS 02102_test_function AS x -> x + 1; +DROP FUNCTION 02102_test_function;